mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Some more fixes for the unicode type
Now replaced in all dynamically loaded code. Recipes/metadata sources/etc. In the case of recipes, since they get compiled by calibre we simply make the unicode/unichr names available, no need for any changes to the actual recipes themselves.
This commit is contained in:
parent
6ad22b392b
commit
2d21a8efa2
@ -36,7 +36,7 @@ def merge():
|
|||||||
for child in svg.iterchildren('*'):
|
for child in svg.iterchildren('*'):
|
||||||
clone_node(child, symbol)
|
clone_node(child, symbol)
|
||||||
ans.append(symbol)
|
ans.append(symbol)
|
||||||
ans = etree.tostring(ans, encoding=unicode, pretty_print=True, with_tail=False)
|
ans = etree.tostring(ans, encoding='unicode', pretty_print=True, with_tail=False)
|
||||||
ans = re.sub('<svg[^>]+>', '<svg style="display:none">', ans, count=1)
|
ans = re.sub('<svg[^>]+>', '<svg style="display:none">', ans, count=1)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
@ -168,7 +168,7 @@ def sort_languages(x):
|
|||||||
lc, name = x
|
lc, name = x
|
||||||
if lc == language:
|
if lc == language:
|
||||||
return ''
|
return ''
|
||||||
return sort_key(unicode(name))
|
return sort_key(type(u'')(name))
|
||||||
|
|
||||||
|
|
||||||
html_context['other_languages'].sort(key=sort_languages)
|
html_context['other_languages'].sort(key=sort_languages)
|
||||||
|
@ -198,7 +198,7 @@ def generate_ebook_convert_help(preamble, app):
|
|||||||
|
|
||||||
|
|
||||||
def update_cli_doc(name, raw, app):
|
def update_cli_doc(name, raw, app):
|
||||||
if isinstance(raw, unicode):
|
if isinstance(raw, type(u'')):
|
||||||
raw = raw.encode('utf-8')
|
raw = raw.encode('utf-8')
|
||||||
path = 'generated/%s/%s.rst' % (app.config.language, name)
|
path = 'generated/%s/%s.rst' % (app.config.language, name)
|
||||||
old_raw = open(path, 'rb').read() if os.path.exists(path) else ''
|
old_raw = open(path, 'rb').read() if os.path.exists(path) else ''
|
||||||
|
@ -21,6 +21,7 @@ prefs = JSONConfig('plugins/interface_demo')
|
|||||||
# Set defaults
|
# Set defaults
|
||||||
prefs.defaults['hello_world_msg'] = 'Hello, World!'
|
prefs.defaults['hello_world_msg'] = 'Hello, World!'
|
||||||
|
|
||||||
|
|
||||||
class ConfigWidget(QWidget):
|
class ConfigWidget(QWidget):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@ -37,5 +38,4 @@ class ConfigWidget(QWidget):
|
|||||||
self.label.setBuddy(self.msg)
|
self.label.setBuddy(self.msg)
|
||||||
|
|
||||||
def save_settings(self):
|
def save_settings(self):
|
||||||
prefs['hello_world_msg'] = unicode(self.msg.text())
|
prefs['hello_world_msg'] = self.msg.text()
|
||||||
|
|
||||||
|
@ -139,7 +139,7 @@ else:
|
|||||||
enc = preferred_encoding
|
enc = preferred_encoding
|
||||||
safe_encode = kwargs.get('safe_encode', False)
|
safe_encode = kwargs.get('safe_encode', False)
|
||||||
for i, arg in enumerate(args):
|
for i, arg in enumerate(args):
|
||||||
if isinstance(arg, unicode):
|
if isinstance(arg, type(u'')):
|
||||||
try:
|
try:
|
||||||
arg = arg.encode(enc)
|
arg = arg.encode(enc)
|
||||||
except UnicodeEncodeError:
|
except UnicodeEncodeError:
|
||||||
@ -150,8 +150,8 @@ else:
|
|||||||
try:
|
try:
|
||||||
arg = str(arg)
|
arg = str(arg)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
arg = unicode(arg)
|
arg = type(u'')(arg)
|
||||||
if isinstance(arg, unicode):
|
if isinstance(arg, type(u'')):
|
||||||
try:
|
try:
|
||||||
arg = arg.encode(enc)
|
arg = arg.encode(enc)
|
||||||
except UnicodeEncodeError:
|
except UnicodeEncodeError:
|
||||||
|
@ -1795,41 +1795,41 @@ class UnicodeDammit:
|
|||||||
elif xml_data[:4] == '\x00\x3c\x00\x3f':
|
elif xml_data[:4] == '\x00\x3c\x00\x3f':
|
||||||
# UTF-16BE
|
# UTF-16BE
|
||||||
sniffed_xml_encoding = 'utf-16be'
|
sniffed_xml_encoding = 'utf-16be'
|
||||||
#xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
|
#xml_data = type(u'')(xml_data, 'utf-16be').encode('utf-8')
|
||||||
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
|
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
|
||||||
and (xml_data[2:4] != '\x00\x00'):
|
and (xml_data[2:4] != '\x00\x00'):
|
||||||
# UTF-16BE with BOM
|
# UTF-16BE with BOM
|
||||||
sniffed_xml_encoding = 'utf-16be'
|
sniffed_xml_encoding = 'utf-16be'
|
||||||
#xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
|
#xml_data = type(u'')(xml_data[2:], 'utf-16be').encode('utf-8')
|
||||||
elif xml_data[:4] == '\x3c\x00\x3f\x00':
|
elif xml_data[:4] == '\x3c\x00\x3f\x00':
|
||||||
# UTF-16LE
|
# UTF-16LE
|
||||||
sniffed_xml_encoding = 'utf-16le'
|
sniffed_xml_encoding = 'utf-16le'
|
||||||
#xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
|
#xml_data = type(u'')(xml_data, 'utf-16le').encode('utf-8')
|
||||||
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
|
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
|
||||||
(xml_data[2:4] != '\x00\x00'):
|
(xml_data[2:4] != '\x00\x00'):
|
||||||
# UTF-16LE with BOM
|
# UTF-16LE with BOM
|
||||||
sniffed_xml_encoding = 'utf-16le'
|
sniffed_xml_encoding = 'utf-16le'
|
||||||
#xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
|
#xml_data = type(u'')(xml_data[2:], 'utf-16le').encode('utf-8')
|
||||||
elif xml_data[:4] == '\x00\x00\x00\x3c':
|
elif xml_data[:4] == '\x00\x00\x00\x3c':
|
||||||
# UTF-32BE
|
# UTF-32BE
|
||||||
sniffed_xml_encoding = 'utf-32be'
|
sniffed_xml_encoding = 'utf-32be'
|
||||||
#xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
|
#xml_data = type(u'')(xml_data, 'utf-32be').encode('utf-8')
|
||||||
elif xml_data[:4] == '\x3c\x00\x00\x00':
|
elif xml_data[:4] == '\x3c\x00\x00\x00':
|
||||||
# UTF-32LE
|
# UTF-32LE
|
||||||
sniffed_xml_encoding = 'utf-32le'
|
sniffed_xml_encoding = 'utf-32le'
|
||||||
#xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
|
#xml_data = type(u'')(xml_data, 'utf-32le').encode('utf-8')
|
||||||
elif xml_data[:4] == '\x00\x00\xfe\xff':
|
elif xml_data[:4] == '\x00\x00\xfe\xff':
|
||||||
# UTF-32BE with BOM
|
# UTF-32BE with BOM
|
||||||
sniffed_xml_encoding = 'utf-32be'
|
sniffed_xml_encoding = 'utf-32be'
|
||||||
#xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
|
#xml_data = type(u'')(xml_data[4:], 'utf-32be').encode('utf-8')
|
||||||
elif xml_data[:4] == '\xff\xfe\x00\x00':
|
elif xml_data[:4] == '\xff\xfe\x00\x00':
|
||||||
# UTF-32LE with BOM
|
# UTF-32LE with BOM
|
||||||
sniffed_xml_encoding = 'utf-32le'
|
sniffed_xml_encoding = 'utf-32le'
|
||||||
#xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
|
#xml_data = type(u'')(xml_data[4:], 'utf-32le').encode('utf-8')
|
||||||
elif xml_data[:3] == '\xef\xbb\xbf':
|
elif xml_data[:3] == '\xef\xbb\xbf':
|
||||||
# UTF-8 with BOM
|
# UTF-8 with BOM
|
||||||
sniffed_xml_encoding = 'utf-8'
|
sniffed_xml_encoding = 'utf-8'
|
||||||
#xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
|
#xml_data = type(u'')(xml_data[3:], 'utf-8').encode('utf-8')
|
||||||
else:
|
else:
|
||||||
sniffed_xml_encoding = 'ascii'
|
sniffed_xml_encoding = 'ascii'
|
||||||
pass
|
pass
|
||||||
|
@ -93,7 +93,7 @@ def parse_details_page(url, log, timeout, browser, domain):
|
|||||||
errmsg = root.xpath('//*[@id="errorMessage"]')
|
errmsg = root.xpath('//*[@id="errorMessage"]')
|
||||||
if errmsg:
|
if errmsg:
|
||||||
msg = 'Failed to parse amazon details page: %r' % url
|
msg = 'Failed to parse amazon details page: %r' % url
|
||||||
msg += tostring(errmsg, method='text', encoding=unicode).strip()
|
msg += tostring(errmsg, method='text', encoding='unicode').strip()
|
||||||
log.error(msg)
|
log.error(msg)
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -466,7 +466,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
self.result_queue.put(mi)
|
self.result_queue.put(mi)
|
||||||
|
|
||||||
def totext(self, elem):
|
def totext(self, elem):
|
||||||
return self.tostring(elem, encoding=unicode, method='text').strip()
|
return self.tostring(elem, encoding='unicode', method='text').strip()
|
||||||
|
|
||||||
def parse_title(self, root):
|
def parse_title(self, root):
|
||||||
h1 = root.xpath('//h1[@id="title"]')
|
h1 = root.xpath('//h1[@id="title"]')
|
||||||
@ -478,10 +478,10 @@ class Worker(Thread): # Get details {{{
|
|||||||
tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0]
|
tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0]
|
||||||
actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]')
|
actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]')
|
||||||
if actual_title:
|
if actual_title:
|
||||||
title = self.tostring(actual_title[0], encoding=unicode,
|
title = self.tostring(actual_title[0], encoding='unicode',
|
||||||
method='text').strip()
|
method='text').strip()
|
||||||
else:
|
else:
|
||||||
title = self.tostring(tdiv, encoding=unicode,
|
title = self.tostring(tdiv, encoding='unicode',
|
||||||
method='text').strip()
|
method='text').strip()
|
||||||
ans = re.sub(r'[(\[].*[)\]]', '', title).strip()
|
ans = re.sub(r'[(\[].*[)\]]', '', title).strip()
|
||||||
if not ans:
|
if not ans:
|
||||||
@ -508,7 +508,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
''')
|
''')
|
||||||
for x in aname:
|
for x in aname:
|
||||||
x.tail = ''
|
x.tail = ''
|
||||||
authors = [self.tostring(x, encoding=unicode, method='text').strip() for x
|
authors = [self.tostring(x, encoding='unicode', method='text').strip() for x
|
||||||
in aname]
|
in aname]
|
||||||
authors = [a for a in authors if a]
|
authors = [a for a in authors if a]
|
||||||
return authors
|
return authors
|
||||||
@ -559,7 +559,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
for a in desc.xpath('descendant::a[@href]'):
|
for a in desc.xpath('descendant::a[@href]'):
|
||||||
del a.attrib['href']
|
del a.attrib['href']
|
||||||
a.tag = 'span'
|
a.tag = 'span'
|
||||||
desc = self.tostring(desc, method='html', encoding=unicode).strip()
|
desc = self.tostring(desc, method='html', encoding='unicode').strip()
|
||||||
|
|
||||||
# Encoding bug in Amazon data U+fffd (replacement char)
|
# Encoding bug in Amazon data U+fffd (replacement char)
|
||||||
# in some examples it is present in place of '
|
# in some examples it is present in place of '
|
||||||
@ -626,14 +626,14 @@ class Worker(Thread): # Get details {{{
|
|||||||
spans = series.xpath('./span')
|
spans = series.xpath('./span')
|
||||||
if spans:
|
if spans:
|
||||||
raw = self.tostring(
|
raw = self.tostring(
|
||||||
spans[0], encoding=unicode, method='text', with_tail=False).strip()
|
spans[0], encoding='unicode', method='text', with_tail=False).strip()
|
||||||
m = re.search(r'\s+([0-9.]+)$', raw.strip())
|
m = re.search(r'\s+([0-9.]+)$', raw.strip())
|
||||||
if m is not None:
|
if m is not None:
|
||||||
series_index = float(m.group(1))
|
series_index = float(m.group(1))
|
||||||
s = series.xpath('./a[@id="series-page-link"]')
|
s = series.xpath('./a[@id="series-page-link"]')
|
||||||
if s:
|
if s:
|
||||||
series = self.tostring(
|
series = self.tostring(
|
||||||
s[0], encoding=unicode, method='text', with_tail=False).strip()
|
s[0], encoding='unicode', method='text', with_tail=False).strip()
|
||||||
if series:
|
if series:
|
||||||
ans = (series, series_index)
|
ans = (series, series_index)
|
||||||
# This is found on Kindle edition pages on amazon.com
|
# This is found on Kindle edition pages on amazon.com
|
||||||
@ -646,7 +646,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
a = span.xpath('./a[@href]')
|
a = span.xpath('./a[@href]')
|
||||||
if a:
|
if a:
|
||||||
series = self.tostring(
|
series = self.tostring(
|
||||||
a[0], encoding=unicode, method='text', with_tail=False).strip()
|
a[0], encoding='unicode', method='text', with_tail=False).strip()
|
||||||
if series:
|
if series:
|
||||||
ans = (series, series_index)
|
ans = (series, series_index)
|
||||||
# This is found on newer Kindle edition pages on amazon.com
|
# This is found on newer Kindle edition pages on amazon.com
|
||||||
@ -659,14 +659,14 @@ class Worker(Thread): # Get details {{{
|
|||||||
a = b.getparent().xpath('./a[@href]')
|
a = b.getparent().xpath('./a[@href]')
|
||||||
if a:
|
if a:
|
||||||
series = self.tostring(
|
series = self.tostring(
|
||||||
a[0], encoding=unicode, method='text', with_tail=False).partition('(')[0].strip()
|
a[0], encoding='unicode', method='text', with_tail=False).partition('(')[0].strip()
|
||||||
if series:
|
if series:
|
||||||
ans = series, series_index
|
ans = series, series_index
|
||||||
|
|
||||||
if ans == (None, None):
|
if ans == (None, None):
|
||||||
desc = root.xpath('//div[@id="ps-content"]/div[@class="buying"]')
|
desc = root.xpath('//div[@id="ps-content"]/div[@class="buying"]')
|
||||||
if desc:
|
if desc:
|
||||||
raw = self.tostring(desc[0], method='text', encoding=unicode)
|
raw = self.tostring(desc[0], method='text', encoding='unicode')
|
||||||
raw = re.sub(r'\s+', ' ', raw)
|
raw = re.sub(r'\s+', ' ', raw)
|
||||||
match = self.series_pat.search(raw)
|
match = self.series_pat.search(raw)
|
||||||
if match is not None:
|
if match is not None:
|
||||||
@ -1161,7 +1161,7 @@ class Amazon(Source):
|
|||||||
if not result_links:
|
if not result_links:
|
||||||
result_links = root.xpath(r'//li[starts-with(@id, "result_")]//a[@href and contains(@class, "s-access-detail-page")]')
|
result_links = root.xpath(r'//li[starts-with(@id, "result_")]//a[@href and contains(@class, "s-access-detail-page")]')
|
||||||
for a in result_links:
|
for a in result_links:
|
||||||
title = tostring(a, method='text', encoding=unicode)
|
title = tostring(a, method='text', encoding='unicode')
|
||||||
if title_ok(title):
|
if title_ok(title):
|
||||||
url = a.get('href')
|
url = a.get('href')
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
@ -1177,7 +1177,7 @@ class Amazon(Source):
|
|||||||
# New amazon markup
|
# New amazon markup
|
||||||
links = div.xpath('descendant::h3/a[@href]')
|
links = div.xpath('descendant::h3/a[@href]')
|
||||||
for a in links:
|
for a in links:
|
||||||
title = tostring(a, method='text', encoding=unicode)
|
title = tostring(a, method='text', encoding='unicode')
|
||||||
if title_ok(title):
|
if title_ok(title):
|
||||||
url = a.get('href')
|
url = a.get('href')
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
@ -1192,7 +1192,7 @@ class Amazon(Source):
|
|||||||
for td in root.xpath(
|
for td in root.xpath(
|
||||||
r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'):
|
r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'):
|
||||||
for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
|
for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
|
||||||
title = tostring(a, method='text', encoding=unicode)
|
title = tostring(a, method='text', encoding='unicode')
|
||||||
if title_ok(title):
|
if title_ok(title):
|
||||||
url = a.get('href')
|
url = a.get('href')
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
|
@ -99,7 +99,7 @@ def main(args=sys.argv):
|
|||||||
log = buf.getvalue()
|
log = buf.getvalue()
|
||||||
|
|
||||||
result = (metadata_to_opf(result) if opts.opf else
|
result = (metadata_to_opf(result) if opts.opf else
|
||||||
unicode(result).encode('utf-8'))
|
type(u'')(result).encode('utf-8'))
|
||||||
|
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
print (log, file=sys.stderr)
|
print (log, file=sys.stderr)
|
||||||
|
@ -203,7 +203,7 @@ class Douban(Source):
|
|||||||
build_term('author', author_tokens))
|
build_term('author', author_tokens))
|
||||||
t = 'search'
|
t = 'search'
|
||||||
q = q.strip()
|
q = q.strip()
|
||||||
if isinstance(q, unicode):
|
if isinstance(q, type(u'')):
|
||||||
q = q.encode('utf-8')
|
q = q.encode('utf-8')
|
||||||
if not q:
|
if not q:
|
||||||
return None
|
return None
|
||||||
|
@ -31,7 +31,7 @@ def parse_html(raw):
|
|||||||
|
|
||||||
def astext(node):
|
def astext(node):
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
return etree.tostring(node, method='text', encoding=unicode,
|
return etree.tostring(node, method='text', encoding='unicode',
|
||||||
with_tail=False).strip()
|
with_tail=False).strip()
|
||||||
|
|
||||||
|
|
||||||
@ -110,7 +110,7 @@ class Worker(Thread): # {{{
|
|||||||
for a in desc.xpath('descendant::a[@href]'):
|
for a in desc.xpath('descendant::a[@href]'):
|
||||||
del a.attrib['href']
|
del a.attrib['href']
|
||||||
a.tag = 'span'
|
a.tag = 'span'
|
||||||
desc = etree.tostring(desc, method='html', encoding=unicode).strip()
|
desc = etree.tostring(desc, method='html', encoding='unicode').strip()
|
||||||
|
|
||||||
# remove all attributes from tags
|
# remove all attributes from tags
|
||||||
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
||||||
@ -160,7 +160,7 @@ def get_basic_data(browser, log, *skus):
|
|||||||
tags = []
|
tags = []
|
||||||
rating = 0
|
rating = 0
|
||||||
for bar in row.xpath('descendant::*[contains(@class, "bgdColorCommunity")]/@style'):
|
for bar in row.xpath('descendant::*[contains(@class, "bgdColorCommunity")]/@style'):
|
||||||
m = re.search('width: (\d+)px;.*max-width: (\d+)px', bar)
|
m = re.search(r'width: (\d+)px;.*max-width: (\d+)px', bar)
|
||||||
if m is not None:
|
if m is not None:
|
||||||
rating = float(m.group(1)) / float(m.group(2))
|
rating = float(m.group(1)) / float(m.group(2))
|
||||||
break
|
break
|
||||||
@ -283,7 +283,7 @@ class Edelweiss(Source):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception('Failed to make identify query: %r'%query)
|
log.exception('Failed to make identify query: %r'%query)
|
||||||
return as_unicode(e)
|
return as_unicode(e)
|
||||||
items = re.search('window[.]items\s*=\s*(.+?);', raw)
|
items = re.search(r'window[.]items\s*=\s*(.+?);', raw)
|
||||||
if items is None:
|
if items is None:
|
||||||
log.error('Failed to get list of matching items')
|
log.error('Failed to get list of matching items')
|
||||||
log.debug('Response text:')
|
log.debug('Response text:')
|
||||||
|
@ -214,7 +214,7 @@ class GoogleBooks(Source):
|
|||||||
if author_tokens:
|
if author_tokens:
|
||||||
q += ('+' if q else '') + build_term('author', author_tokens)
|
q += ('+' if q else '') + build_term('author', author_tokens)
|
||||||
|
|
||||||
if isinstance(q, unicode):
|
if isinstance(q, type(u'')):
|
||||||
q = q.encode('utf-8')
|
q = q.encode('utf-8')
|
||||||
if not q:
|
if not q:
|
||||||
return None
|
return None
|
||||||
|
@ -471,7 +471,7 @@ def identify(log, abort, # {{{
|
|||||||
for r in presults:
|
for r in presults:
|
||||||
log('\n\n---')
|
log('\n\n---')
|
||||||
try:
|
try:
|
||||||
log(unicode(r))
|
log(type(u'')(r))
|
||||||
except TypeError:
|
except TypeError:
|
||||||
log(repr(r))
|
log(repr(r))
|
||||||
if plog:
|
if plog:
|
||||||
|
@ -233,7 +233,7 @@ class OverDrive(Source):
|
|||||||
xreq.add_header('Referer', q_init_search)
|
xreq.add_header('Referer', q_init_search)
|
||||||
xreq.add_header('Accept', 'application/json, text/javascript, */*')
|
xreq.add_header('Accept', 'application/json, text/javascript, */*')
|
||||||
raw = br.open_novisit(xreq).read()
|
raw = br.open_novisit(xreq).read()
|
||||||
for m in re.finditer(unicode(r'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)'), raw):
|
for m in re.finditer(type(u'')(r'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)'), raw):
|
||||||
if int(m.group('totalrecords')) == 0:
|
if int(m.group('totalrecords')) == 0:
|
||||||
return ''
|
return ''
|
||||||
elif int(m.group('displayrecords')) >= 1:
|
elif int(m.group('displayrecords')) >= 1:
|
||||||
@ -450,7 +450,7 @@ class OverDrive(Source):
|
|||||||
|
|
||||||
if desc:
|
if desc:
|
||||||
desc = desc[0]
|
desc = desc[0]
|
||||||
desc = html.tostring(desc, method='html', encoding=unicode).strip()
|
desc = html.tostring(desc, method='html', encoding='unicode').strip()
|
||||||
# remove all attributes from tags
|
# remove all attributes from tags
|
||||||
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
|
||||||
# Remove comments
|
# Remove comments
|
||||||
|
@ -100,7 +100,7 @@ class Ozon(Source):
|
|||||||
qItems.discard('')
|
qItems.discard('')
|
||||||
searchText = u' '.join(qItems).strip()
|
searchText = u' '.join(qItems).strip()
|
||||||
|
|
||||||
if isinstance(searchText, unicode):
|
if isinstance(searchText, type(u'')):
|
||||||
searchText = searchText.encode('utf-8')
|
searchText = searchText.encode('utf-8')
|
||||||
if not searchText:
|
if not searchText:
|
||||||
return None
|
return None
|
||||||
@ -148,7 +148,7 @@ class Ozon(Source):
|
|||||||
else:
|
else:
|
||||||
# Redirect page: trying to extract ozon_id from javascript data
|
# Redirect page: trying to extract ozon_id from javascript data
|
||||||
h = HTMLParser()
|
h = HTMLParser()
|
||||||
entry_string = (h.unescape(etree.tostring(doc, pretty_print=True, encoding=unicode)))
|
entry_string = (h.unescape(etree.tostring(doc, pretty_print=True, encoding='unicode')))
|
||||||
json_pat = re.compile(r'dataLayer\s*=\s*(.+)?;')
|
json_pat = re.compile(r'dataLayer\s*=\s*(.+)?;')
|
||||||
json_info = re.search(json_pat, entry_string)
|
json_info = re.search(json_pat, entry_string)
|
||||||
jsondata = json_info.group(1) if json_info else None
|
jsondata = json_info.group(1) if json_info else None
|
||||||
@ -198,16 +198,16 @@ class Ozon(Source):
|
|||||||
|
|
||||||
reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]')
|
reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]')
|
||||||
|
|
||||||
title = unicode(title).upper() if title else ''
|
title = type(u'')(title).upper() if title else ''
|
||||||
if reRemoveFromTitle:
|
if reRemoveFromTitle:
|
||||||
title = reRemoveFromTitle.sub('', title)
|
title = reRemoveFromTitle.sub('', title)
|
||||||
authors = map(_normalizeAuthorNameWithInitials,
|
authors = map(_normalizeAuthorNameWithInitials,
|
||||||
map(unicode.upper, map(unicode, authors))) if authors else None
|
map(type(u'').upper, map(type(u''), authors))) if authors else None
|
||||||
|
|
||||||
ozon_id = identifiers.get('ozon', None)
|
ozon_id = identifiers.get('ozon', None)
|
||||||
# log.debug(u'ozonid: ', ozon_id)
|
# log.debug(u'ozonid: ', ozon_id)
|
||||||
|
|
||||||
unk = unicode(_('Unknown')).upper()
|
unk = type(u'')(_('Unknown')).upper()
|
||||||
|
|
||||||
if title == unk:
|
if title == unk:
|
||||||
title = None
|
title = None
|
||||||
@ -226,7 +226,7 @@ class Ozon(Source):
|
|||||||
def calc_source_relevance(mi): # {{{
|
def calc_source_relevance(mi): # {{{
|
||||||
relevance = 0
|
relevance = 0
|
||||||
if title:
|
if title:
|
||||||
mititle = unicode(mi.title).upper() if mi.title else ''
|
mititle = type(u'')(mi.title).upper() if mi.title else ''
|
||||||
|
|
||||||
if reRemoveFromTitle:
|
if reRemoveFromTitle:
|
||||||
mititle = reRemoveFromTitle.sub('', mititle)
|
mititle = reRemoveFromTitle.sub('', mititle)
|
||||||
@ -240,7 +240,7 @@ class Ozon(Source):
|
|||||||
relevance += 1
|
relevance += 1
|
||||||
|
|
||||||
if authors:
|
if authors:
|
||||||
miauthors = map(unicode.upper, map(unicode, mi.authors)) if mi.authors else []
|
miauthors = map(type(u'').upper, map(type(u''), mi.authors)) if mi.authors else []
|
||||||
# log.debug('Authors %s vs miauthors %s'%(','.join(authors), ','.join(miauthors)))
|
# log.debug('Authors %s vs miauthors %s'%(','.join(authors), ','.join(miauthors)))
|
||||||
|
|
||||||
if (in_authors(authors, miauthors)):
|
if (in_authors(authors, miauthors)):
|
||||||
@ -320,13 +320,13 @@ class Ozon(Source):
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def to_metadata(self, log, entry): # {{{
|
def to_metadata(self, log, entry): # {{{
|
||||||
title = unicode(entry.xpath(u'normalize-space(.//div[@itemprop="name"][1]/text())'))
|
title = type(u'')(entry.xpath(u'normalize-space(.//div[@itemprop="name"][1]/text())'))
|
||||||
# log.debug(u'Title: -----> %s' % title)
|
# log.debug(u'Title: -----> %s' % title)
|
||||||
|
|
||||||
author = unicode(entry.xpath(u'normalize-space(.//div[contains(@class, "mPerson")])'))
|
author = type(u'')(entry.xpath(u'normalize-space(.//div[contains(@class, "mPerson")])'))
|
||||||
# log.debug(u'Author: -----> %s' % author)
|
# log.debug(u'Author: -----> %s' % author)
|
||||||
|
|
||||||
norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u',')))
|
norm_authors = map(_normalizeAuthorNameWithInitials, map(type(u'').strip, type(u'')(author).split(u',')))
|
||||||
mi = Metadata(title, norm_authors)
|
mi = Metadata(title, norm_authors)
|
||||||
|
|
||||||
ozon_id = entry.get('data-href').split('/')[-2]
|
ozon_id = entry.get('data-href').split('/')[-2]
|
||||||
@ -524,7 +524,7 @@ class Ozon(Source):
|
|||||||
# comments, from Javascript data
|
# comments, from Javascript data
|
||||||
beginning = fullString.find(u'FirstBlock')
|
beginning = fullString.find(u'FirstBlock')
|
||||||
end = fullString.find(u'}', beginning)
|
end = fullString.find(u'}', beginning)
|
||||||
comments = unicode(fullString[beginning + 75:end - 1]).decode("unicode-escape")
|
comments = type(u'')(fullString[beginning + 75:end - 1]).decode("unicode-escape")
|
||||||
metadata.comments = replace_entities(comments, 'utf-8')
|
metadata.comments = replace_entities(comments, 'utf-8')
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
@ -603,7 +603,7 @@ def _format_isbn(log, isbn): # {{{
|
|||||||
|
|
||||||
|
|
||||||
def _translageLanguageToCode(displayLang): # {{{
|
def _translageLanguageToCode(displayLang): # {{{
|
||||||
displayLang = unicode(displayLang).strip() if displayLang else None
|
displayLang = type(u'')(displayLang).strip() if displayLang else None
|
||||||
langTbl = {None: 'ru',
|
langTbl = {None: 'ru',
|
||||||
u'Русский': 'ru',
|
u'Русский': 'ru',
|
||||||
u'Немецкий': 'de',
|
u'Немецкий': 'de',
|
||||||
@ -627,9 +627,9 @@ def _normalizeAuthorNameWithInitials(name): # {{{
|
|||||||
if name:
|
if name:
|
||||||
re1 = r'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$'
|
re1 = r'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$'
|
||||||
re2 = r'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$'
|
re2 = r'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$'
|
||||||
matcher = re.match(re1, unicode(name), re.UNICODE)
|
matcher = re.match(re1, type(u'')(name), re.UNICODE)
|
||||||
if not matcher:
|
if not matcher:
|
||||||
matcher = re.match(re2, unicode(name), re.UNICODE)
|
matcher = re.match(re2, type(u'')(name), re.UNICODE)
|
||||||
|
|
||||||
if matcher:
|
if matcher:
|
||||||
d = matcher.groupdict()
|
d = matcher.groupdict()
|
||||||
@ -653,7 +653,7 @@ def toPubdate(log, yearAsString): # {{{
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def _listToUnicodePrintStr(lst): # {{{
|
def _listToUnicodePrintStr(lst): # {{{
|
||||||
return u'[' + u', '.join(unicode(x) for x in lst) + u']'
|
return u'[' + u', '.join(type(u'')(x) for x in lst) + u']'
|
||||||
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
@ -26,7 +26,7 @@ Result = namedtuple('Result', 'url title cached_url')
|
|||||||
|
|
||||||
|
|
||||||
def tostring(elem):
|
def tostring(elem):
|
||||||
return etree.tostring(elem, encoding=unicode, method='text', with_tail=False)
|
return etree.tostring(elem, encoding='unicode', method='text', with_tail=False)
|
||||||
|
|
||||||
|
|
||||||
def browser():
|
def browser():
|
||||||
|
@ -128,11 +128,11 @@ class Textile(object):
|
|||||||
|
|
||||||
pnct = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]'
|
pnct = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]'
|
||||||
# urlch = r'[\w"$\-_.+!*\'(),";/?:@=&%#{}|\\^~\[\]`]'
|
# urlch = r'[\w"$\-_.+!*\'(),";/?:@=&%#{}|\\^~\[\]`]'
|
||||||
urlch = '[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]'
|
urlch = r'[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]'
|
||||||
|
|
||||||
url_schemes = ('http', 'https', 'ftp', 'mailto')
|
url_schemes = ('http', 'https', 'ftp', 'mailto')
|
||||||
|
|
||||||
btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p')
|
btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', r'fn\d+', 'p')
|
||||||
btag_lite = ('bq', 'bc', 'p')
|
btag_lite = ('bq', 'bc', 'p')
|
||||||
|
|
||||||
macro_defaults = [
|
macro_defaults = [
|
||||||
@ -292,7 +292,7 @@ class Textile(object):
|
|||||||
"""
|
"""
|
||||||
self.html_type = html_type
|
self.html_type = html_type
|
||||||
|
|
||||||
# text = unicode(text)
|
# text = type(u'')(text)
|
||||||
text = _normalize_newlines(text)
|
text = _normalize_newlines(text)
|
||||||
|
|
||||||
if self.restricted:
|
if self.restricted:
|
||||||
|
@ -21,7 +21,9 @@ class BiblioStore(BasicStoreConfig, OpenSearchOPDSStore):
|
|||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
# check for cyrillic symbols before performing search
|
# check for cyrillic symbols before performing search
|
||||||
uquery = unicode(query.strip(), 'utf-8')
|
if isinstance(query, bytes):
|
||||||
|
query = query.decode('utf-8')
|
||||||
|
uquery = query.strip()
|
||||||
reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery)
|
reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery)
|
||||||
if not reObj:
|
if not reObj:
|
||||||
return
|
return
|
||||||
|
@ -43,7 +43,9 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
|
|||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
# check for cyrillic symbols before performing search
|
# check for cyrillic symbols before performing search
|
||||||
uquery = unicode(query.strip(), 'utf-8')
|
if isinstance(query, bytes):
|
||||||
|
query = query.decode('utf-8')
|
||||||
|
uquery = query.strip()
|
||||||
reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery)
|
reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery)
|
||||||
if not reObj:
|
if not reObj:
|
||||||
return
|
return
|
||||||
@ -56,7 +58,7 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
|
|||||||
br = browser()
|
br = browser()
|
||||||
try:
|
try:
|
||||||
with closing(br.open(url, timeout=timeout)) as f:
|
with closing(br.open(url, timeout=timeout)) as f:
|
||||||
f = unicode(f.read(), 'utf-8')
|
f = f.read().decode('utf-8')
|
||||||
doc = html.fromstring(f)
|
doc = html.fromstring(f)
|
||||||
|
|
||||||
for data in doc.xpath('//ul[@class="superlist booklist"]/li'):
|
for data in doc.xpath('//ul[@class="superlist booklist"]/li'):
|
||||||
@ -98,7 +100,7 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
|
|||||||
with closing(br2.open(base_url + author_url, timeout=timeout)) as f:
|
with closing(br2.open(base_url + author_url, timeout=timeout)) as f:
|
||||||
if counter <= 0:
|
if counter <= 0:
|
||||||
break
|
break
|
||||||
f = unicode(f.read(), 'utf-8')
|
f = f.read().decode('utf-8')
|
||||||
doc2 = html.fromstring(f)
|
doc2 = html.fromstring(f)
|
||||||
|
|
||||||
# search for book title
|
# search for book title
|
||||||
|
@ -22,10 +22,9 @@ class EbooksGratuitsStore(BasicStoreConfig, OpenSearchOPDSStore):
|
|||||||
return ascii_text(s)
|
return ascii_text(s)
|
||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
query = self.strip_accents(unicode(query))
|
query = self.strip_accents(type(u'')(query))
|
||||||
for s in OpenSearchOPDSStore.search(self, query, max_results, timeout):
|
for s in OpenSearchOPDSStore.search(self, query, max_results, timeout):
|
||||||
if s.downloads:
|
if s.downloads:
|
||||||
s.drm = SearchResult.DRM_UNLOCKED
|
s.drm = SearchResult.DRM_UNLOCKED
|
||||||
s.price = '$0.00'
|
s.price = '$0.00'
|
||||||
yield s
|
yield s
|
||||||
|
|
||||||
|
@ -49,7 +49,7 @@ class eKnigiStore(BasicStoreConfig, StorePlugin):
|
|||||||
|
|
||||||
def search(self, query, max_results=10, timeout=60):
|
def search(self, query, max_results=10, timeout=60):
|
||||||
# check for cyrillic symbols before performing search
|
# check for cyrillic symbols before performing search
|
||||||
uquery = unicode(query.strip(), 'utf-8')
|
uquery = type(u'')(query.strip(), 'utf-8')
|
||||||
reObj = re.search(u'^[а-яА-Я\\d\\s]{2,}$', uquery)
|
reObj = re.search(u'^[а-яА-Я\\d\\s]{2,}$', uquery)
|
||||||
if not reObj:
|
if not reObj:
|
||||||
return
|
return
|
||||||
|
@ -46,7 +46,7 @@ def search_kobo(query, max_results=10, timeout=60, write_html_to=None):
|
|||||||
cover_url = None
|
cover_url = None
|
||||||
|
|
||||||
for p in select('p.title', item):
|
for p in select('p.title', item):
|
||||||
title = etree.tostring(p, method='text', encoding=unicode).strip()
|
title = etree.tostring(p, method='text', encoding='unicode').strip()
|
||||||
for a in select('a[href]', p):
|
for a in select('a[href]', p):
|
||||||
url = a.get('href')
|
url = a.get('href')
|
||||||
break
|
break
|
||||||
@ -58,11 +58,11 @@ def search_kobo(query, max_results=10, timeout=60, write_html_to=None):
|
|||||||
|
|
||||||
authors = []
|
authors = []
|
||||||
for a in select('p.contributor-list a.contributor-name', item):
|
for a in select('p.contributor-list a.contributor-name', item):
|
||||||
authors.append(etree.tostring(a, method='text', encoding=unicode).strip())
|
authors.append(etree.tostring(a, method='text', encoding='unicode').strip())
|
||||||
authors = authors_to_string(authors)
|
authors = authors_to_string(authors)
|
||||||
|
|
||||||
for p in select('p.price', item):
|
for p in select('p.price', item):
|
||||||
price = etree.tostring(p, method='text', encoding=unicode).strip()
|
price = etree.tostring(p, method='text', encoding='unicode').strip()
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
price = None
|
price = None
|
||||||
|
@ -88,7 +88,7 @@ class LitResStore(BasicStoreConfig, StorePlugin):
|
|||||||
authors = data.xpath('.//title-info/author/first-name/text()|'
|
authors = data.xpath('.//title-info/author/first-name/text()|'
|
||||||
'.//title-info/author/middle-name/text()|'
|
'.//title-info/author/middle-name/text()|'
|
||||||
'.//title-info/author/last-name/text()')
|
'.//title-info/author/last-name/text()')
|
||||||
sRes.author = u' '.join(map(unicode, authors))
|
sRes.author = u' '.join(map(type(u''), authors))
|
||||||
sRes.price = data.xpath(xp_template.format('price'))
|
sRes.price = data.xpath(xp_template.format('price'))
|
||||||
# cover vs cover_preview
|
# cover vs cover_preview
|
||||||
sRes.cover_url = data.xpath(xp_template.format('cover_preview'))
|
sRes.cover_url = data.xpath(xp_template.format('cover_preview'))
|
||||||
@ -107,7 +107,7 @@ def format_price_in_RUR(price):
|
|||||||
@return: formatted price if possible otherwise original value
|
@return: formatted price if possible otherwise original value
|
||||||
@rtype: unicode
|
@rtype: unicode
|
||||||
'''
|
'''
|
||||||
if price and re.match("^\d*?\.\d*?$", price):
|
if price and re.match(r"^\d*?\.\d*?$", price):
|
||||||
try:
|
try:
|
||||||
price = u'{:,.2F} руб.'.format(float(price))
|
price = u'{:,.2F} руб.'.format(float(price))
|
||||||
price = price.replace(',', ' ').replace('.', ',', 1)
|
price = price.replace(',', ' ').replace('.', ',', 1)
|
||||||
|
@ -67,7 +67,7 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
|
|||||||
self.mc = '='
|
self.mc = '='
|
||||||
else:
|
else:
|
||||||
self.mc = '~'
|
self.mc = '~'
|
||||||
all, any, phrase, none = map(lambda x: unicode(x.text()),
|
all, any, phrase, none = map(lambda x: type(u'')(x.text()),
|
||||||
(self.all, self.any, self.phrase, self.none))
|
(self.all, self.any, self.phrase, self.none))
|
||||||
all, any, none = map(self.tokens, (all, any, none))
|
all, any, none = map(self.tokens, (all, any, none))
|
||||||
phrase = phrase.strip()
|
phrase = phrase.strip()
|
||||||
@ -86,11 +86,11 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
def token(self):
|
def token(self):
|
||||||
txt = unicode(self.text.text()).strip()
|
txt = type(u'')(self.text.text()).strip()
|
||||||
if txt:
|
if txt:
|
||||||
if self.negate.isChecked():
|
if self.negate.isChecked():
|
||||||
txt = '!'+txt
|
txt = '!'+txt
|
||||||
tok = self.FIELDS[unicode(self.field.currentText())]+txt
|
tok = self.FIELDS[type(u'')(self.field.currentText())]+txt
|
||||||
if re.search(r'\s', tok):
|
if re.search(r'\s', tok):
|
||||||
tok = '"%s"'%tok
|
tok = '"%s"'%tok
|
||||||
return tok
|
return tok
|
||||||
@ -106,13 +106,13 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
|
|||||||
|
|
||||||
ans = []
|
ans = []
|
||||||
self.box_last_values = {}
|
self.box_last_values = {}
|
||||||
title = unicode(self.title_box.text()).strip()
|
title = type(u'')(self.title_box.text()).strip()
|
||||||
if title:
|
if title:
|
||||||
ans.append('title:"' + self.mc + title + '"')
|
ans.append('title:"' + self.mc + title + '"')
|
||||||
author = unicode(self.author_box.text()).strip()
|
author = type(u'')(self.author_box.text()).strip()
|
||||||
if author:
|
if author:
|
||||||
ans.append('author:"' + self.mc + author + '"')
|
ans.append('author:"' + self.mc + author + '"')
|
||||||
format = unicode(self.format_box.text()).strip()
|
format = type(u'')(self.format_box.text()).strip()
|
||||||
if format:
|
if format:
|
||||||
ans.append('format:"' + self.mc + format + '"')
|
ans.append('format:"' + self.mc + format + '"')
|
||||||
if ans:
|
if ans:
|
||||||
|
@ -22,7 +22,7 @@ class CacheUpdateThread(Thread, QObject):
|
|||||||
|
|
||||||
total_changed = pyqtSignal(int)
|
total_changed = pyqtSignal(int)
|
||||||
update_progress = pyqtSignal(int)
|
update_progress = pyqtSignal(int)
|
||||||
update_details = pyqtSignal(unicode)
|
update_details = pyqtSignal(type(u''))
|
||||||
|
|
||||||
def __init__(self, config, seralize_books_function, timeout):
|
def __init__(self, config, seralize_books_function, timeout):
|
||||||
Thread.__init__(self)
|
Thread.__init__(self)
|
||||||
|
@ -105,7 +105,7 @@ class BooksModel(QAbstractItemModel):
|
|||||||
return
|
return
|
||||||
descending = order == Qt.DescendingOrder
|
descending = order == Qt.DescendingOrder
|
||||||
self.books.sort(None,
|
self.books.sort(None,
|
||||||
lambda x: sort_key(unicode(self.data_as_text(x, col))),
|
lambda x: sort_key(type(u'')(self.data_as_text(x, col))),
|
||||||
descending)
|
descending)
|
||||||
if reset:
|
if reset:
|
||||||
self.beginResetModel(), self.endResetModel()
|
self.beginResetModel(), self.endResetModel()
|
||||||
|
@ -40,7 +40,7 @@ class MobileReadStoreDialog(QDialog, Ui_Dialog):
|
|||||||
self.restore_state()
|
self.restore_state()
|
||||||
|
|
||||||
def do_search(self):
|
def do_search(self):
|
||||||
self.results_view.model().search(unicode(self.search_query.text()))
|
self.results_view.model().search(type(u'')(self.search_query.text()))
|
||||||
|
|
||||||
def open_store(self, index):
|
def open_store(self, index):
|
||||||
result = self.results_view.model().get_book(index)
|
result = self.results_view.model().get_book(index)
|
||||||
|
@ -6,9 +6,13 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from calibre.constants import iswindows, preferred_encoding
|
from calibre.constants import iswindows, preferred_encoding, ispy3
|
||||||
|
|
||||||
|
|
||||||
|
if ispy3:
|
||||||
|
from getpass import getpass
|
||||||
|
getpass
|
||||||
|
else:
|
||||||
def getpass(prompt):
|
def getpass(prompt):
|
||||||
if iswindows:
|
if iswindows:
|
||||||
# getpass is broken on windows with python 2.x and unicode, the
|
# getpass is broken on windows with python 2.x and unicode, the
|
||||||
|
@ -9,7 +9,7 @@ from calibre.web.feeds.news import (BasicNewsRecipe, CustomIndexRecipe,
|
|||||||
AutomaticNewsRecipe, CalibrePeriodical)
|
AutomaticNewsRecipe, CalibrePeriodical)
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
from calibre.utils.config import JSONConfig
|
from calibre.utils.config import JSONConfig
|
||||||
from polyglot.builtins import unicode_type
|
from polyglot.builtins import unicode_type, codepoint_to_chr
|
||||||
|
|
||||||
basic_recipes = (BasicNewsRecipe, AutomaticNewsRecipe, CustomIndexRecipe,
|
basic_recipes = (BasicNewsRecipe, AutomaticNewsRecipe, CustomIndexRecipe,
|
||||||
CalibrePeriodical)
|
CalibrePeriodical)
|
||||||
@ -44,7 +44,9 @@ def compile_recipe(src):
|
|||||||
'BasicNewsRecipe':BasicNewsRecipe,
|
'BasicNewsRecipe':BasicNewsRecipe,
|
||||||
'AutomaticNewsRecipe':AutomaticNewsRecipe,
|
'AutomaticNewsRecipe':AutomaticNewsRecipe,
|
||||||
'time':time, 're':re,
|
'time':time, 're':re,
|
||||||
'BeautifulSoup':BeautifulSoup
|
'BeautifulSoup':BeautifulSoup,
|
||||||
|
'unicode': unicode_type,
|
||||||
|
'unichr': codepoint_to_chr,
|
||||||
}
|
}
|
||||||
exec(src, namespace)
|
exec(src, namespace)
|
||||||
|
|
||||||
|
@ -15,20 +15,17 @@ import operator
|
|||||||
import string
|
import string
|
||||||
|
|
||||||
from css_selectors.errors import SelectorSyntaxError, ExpressionError
|
from css_selectors.errors import SelectorSyntaxError, ExpressionError
|
||||||
|
from polyglot.builtins import unicode_type, codepoint_to_chr
|
||||||
|
|
||||||
if sys.version_info[0] < 3:
|
|
||||||
_unicode = unicode
|
|
||||||
_unichr = unichr
|
|
||||||
else:
|
|
||||||
_unicode = str
|
|
||||||
_unichr = chr
|
|
||||||
|
|
||||||
tab = string.maketrans(string.ascii_uppercase, string.ascii_lowercase)
|
tab = string.maketrans(string.ascii_uppercase, string.ascii_lowercase)
|
||||||
utab = {c:c+32 for c in range(ord('A'), ord('Z')+1)}
|
utab = {c:c+32 for c in range(ord('A'), ord('Z')+1)}
|
||||||
|
|
||||||
|
|
||||||
def ascii_lower(string):
|
def ascii_lower(string):
|
||||||
"""Lower-case, but only in the ASCII range."""
|
"""Lower-case, but only in the ASCII range."""
|
||||||
return string.translate(utab if isinstance(string, _unicode) else tab)
|
return string.translate(utab if isinstance(string, unicode_type) else tab)
|
||||||
|
|
||||||
|
|
||||||
def urepr(x):
|
def urepr(x):
|
||||||
if isinstance(x, list):
|
if isinstance(x, list):
|
||||||
@ -38,6 +35,7 @@ def urepr(x):
|
|||||||
ans = ans[1:]
|
ans = ans[1:]
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
# Parsed objects
|
# Parsed objects
|
||||||
|
|
||||||
class Selector(object):
|
class Selector(object):
|
||||||
@ -385,6 +383,7 @@ def parse_selector_group(stream):
|
|||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
def parse_selector(stream):
|
def parse_selector(stream):
|
||||||
result, pseudo_element = parse_simple_selector(stream)
|
result, pseudo_element = parse_simple_selector(stream)
|
||||||
while 1:
|
while 1:
|
||||||
@ -461,7 +460,7 @@ def parse_simple_selector(stream, inside_negation=False):
|
|||||||
'before', 'after'):
|
'before', 'after'):
|
||||||
# Special case: CSS 2.1 pseudo-elements can have a single ':'
|
# Special case: CSS 2.1 pseudo-elements can have a single ':'
|
||||||
# Any new pseudo-element must have two.
|
# Any new pseudo-element must have two.
|
||||||
pseudo_element = _unicode(ident)
|
pseudo_element = unicode_type(ident)
|
||||||
continue
|
continue
|
||||||
if stream.peek() != ('DELIM', '('):
|
if stream.peek() != ('DELIM', '('):
|
||||||
result = Pseudo(result, ident)
|
result = Pseudo(result, ident)
|
||||||
@ -626,11 +625,13 @@ class TokenMacros:
|
|||||||
nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii)
|
nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii)
|
||||||
nmstart = '[_a-z]|%s|%s' % (escape, nonascii)
|
nmstart = '[_a-z]|%s|%s' % (escape, nonascii)
|
||||||
|
|
||||||
|
|
||||||
def _compile(pattern):
|
def _compile(pattern):
|
||||||
return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match
|
return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match
|
||||||
|
|
||||||
|
|
||||||
_match_whitespace = _compile(r'[ \t\r\n\f]+')
|
_match_whitespace = _compile(r'[ \t\r\n\f]+')
|
||||||
_match_number = _compile('[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
|
_match_number = _compile(r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
|
||||||
_match_hash = _compile('#(?:%(nmchar)s)+')
|
_match_hash = _compile('#(?:%(nmchar)s)+')
|
||||||
_match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*')
|
_match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*')
|
||||||
_match_string_by_quote = {
|
_match_string_by_quote = {
|
||||||
@ -650,11 +651,12 @@ else:
|
|||||||
def _replace_simple(match):
|
def _replace_simple(match):
|
||||||
return match.group(1)
|
return match.group(1)
|
||||||
|
|
||||||
|
|
||||||
def _replace_unicode(match):
|
def _replace_unicode(match):
|
||||||
codepoint = int(match.group(1), 16)
|
codepoint = int(match.group(1), 16)
|
||||||
if codepoint > sys.maxunicode:
|
if codepoint > sys.maxunicode:
|
||||||
codepoint = 0xFFFD
|
codepoint = 0xFFFD
|
||||||
return _unichr(codepoint)
|
return codepoint_to_chr(codepoint)
|
||||||
|
|
||||||
|
|
||||||
def unescape_ident(value):
|
def unescape_ident(value):
|
||||||
|
@ -29,7 +29,7 @@ def make_NCName(arg):
|
|||||||
return arg
|
return arg
|
||||||
|
|
||||||
def cnv_anyURI(attribute, arg, element):
|
def cnv_anyURI(attribute, arg, element):
|
||||||
return unicode(arg)
|
return type(u'')(arg)
|
||||||
|
|
||||||
def cnv_boolean(attribute, arg, element):
|
def cnv_boolean(attribute, arg, element):
|
||||||
if arg.lower() in ("false","no"):
|
if arg.lower() in ("false","no"):
|
||||||
@ -85,13 +85,13 @@ def cnv_family(attribute, arg, element):
|
|||||||
def __save_prefix(attribute, arg, element):
|
def __save_prefix(attribute, arg, element):
|
||||||
prefix = arg.split(':',1)[0]
|
prefix = arg.split(':',1)[0]
|
||||||
if prefix == arg:
|
if prefix == arg:
|
||||||
return unicode(arg)
|
return type(u'')(arg)
|
||||||
namespace = element.get_knownns(prefix)
|
namespace = element.get_knownns(prefix)
|
||||||
if namespace is None:
|
if namespace is None:
|
||||||
#raise ValueError, "'%s' is an unknown prefix" % str(prefix)
|
#raise ValueError, "'%s' is an unknown prefix" % str(prefix)
|
||||||
return unicode(arg)
|
return type(u'')(arg)
|
||||||
p = element.get_nsprefix(namespace)
|
p = element.get_nsprefix(namespace)
|
||||||
return unicode(arg)
|
return type(u'')(arg)
|
||||||
|
|
||||||
def cnv_formula(attribute, arg, element):
|
def cnv_formula(attribute, arg, element):
|
||||||
""" A string containing a formula. Formulas do not have a predefined syntax, but the string should
|
""" A string containing a formula. Formulas do not have a predefined syntax, but the string should
|
||||||
@ -218,7 +218,7 @@ def cnv_positiveInteger(attribute, arg, element):
|
|||||||
return str(arg)
|
return str(arg)
|
||||||
|
|
||||||
def cnv_string(attribute, arg, element):
|
def cnv_string(attribute, arg, element):
|
||||||
return unicode(arg)
|
return type(u'')(arg)
|
||||||
|
|
||||||
def cnv_textnoteclass(attribute, arg, element):
|
def cnv_textnoteclass(attribute, arg, element):
|
||||||
if str(arg) not in ("footnote", "endnote"):
|
if str(arg) not in ("footnote", "endnote"):
|
||||||
@ -1480,5 +1480,4 @@ class AttrConverters:
|
|||||||
conversion = attrconverters.get((attribute, None), None)
|
conversion = attrconverters.get((attribute, None), None)
|
||||||
if conversion is not None:
|
if conversion is not None:
|
||||||
return conversion(attribute, value, element)
|
return conversion(attribute, value, element)
|
||||||
return unicode(value)
|
return type(u'')(value)
|
||||||
|
|
||||||
|
@ -182,7 +182,7 @@ class Node(xml.dom.Node):
|
|||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
val = []
|
val = []
|
||||||
for c in self.childNodes:
|
for c in self.childNodes:
|
||||||
val.append(unicode(c))
|
val.append(type(u'')(c))
|
||||||
return u''.join(val)
|
return u''.join(val)
|
||||||
|
|
||||||
defproperty(Node, "firstChild", doc="First child node, or None.")
|
defproperty(Node, "firstChild", doc="First child node, or None.")
|
||||||
@ -253,7 +253,7 @@ class Text(Childless, Node):
|
|||||||
def toXml(self,level,f):
|
def toXml(self,level,f):
|
||||||
""" Write XML in UTF-8 """
|
""" Write XML in UTF-8 """
|
||||||
if self.data:
|
if self.data:
|
||||||
f.write(_escape(unicode(self.data).encode('utf-8')))
|
f.write(_escape(type(u'')(self.data).encode('utf-8')))
|
||||||
|
|
||||||
class CDATASection(Childless, Text):
|
class CDATASection(Childless, Text):
|
||||||
nodeType = Node.CDATA_SECTION_NODE
|
nodeType = Node.CDATA_SECTION_NODE
|
||||||
@ -469,7 +469,7 @@ class Element(Node):
|
|||||||
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
|
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
|
||||||
for qname in self.attributes.keys():
|
for qname in self.attributes.keys():
|
||||||
prefix = self.get_nsprefix(qname[0])
|
prefix = self.get_nsprefix(qname[0])
|
||||||
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
|
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(type(u'')(self.attributes[qname]).encode('utf-8')))
|
||||||
f.write('>')
|
f.write('>')
|
||||||
|
|
||||||
def write_close_tag(self, level, f):
|
def write_close_tag(self, level, f):
|
||||||
@ -483,7 +483,7 @@ class Element(Node):
|
|||||||
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
|
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
|
||||||
for qname in self.attributes.keys():
|
for qname in self.attributes.keys():
|
||||||
prefix = self.get_nsprefix(qname[0])
|
prefix = self.get_nsprefix(qname[0])
|
||||||
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
|
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(type(u'')(self.attributes[qname]).encode('utf-8')))
|
||||||
if self.childNodes:
|
if self.childNodes:
|
||||||
f.write('>')
|
f.write('>')
|
||||||
for element in self.childNodes:
|
for element in self.childNodes:
|
||||||
@ -509,5 +509,3 @@ class Element(Node):
|
|||||||
""" This is a check to see if the object is an instance of a type """
|
""" This is a check to see if the object is an instance of a type """
|
||||||
obj = element(check_grammar=False)
|
obj = element(check_grammar=False)
|
||||||
return self.qname == obj.qname
|
return self.qname == obj.qname
|
||||||
|
|
||||||
|
|
||||||
|
@ -55,6 +55,7 @@ if False: # Added by Kovid
|
|||||||
# character etc. styles Since CSS2 has no scope we use a prefix. (Not elegant)
|
# character etc. styles Since CSS2 has no scope we use a prefix. (Not elegant)
|
||||||
# In ODF a style can have a parent, these parents can be chained.
|
# In ODF a style can have a parent, these parents can be chained.
|
||||||
|
|
||||||
|
|
||||||
class StyleToCSS:
|
class StyleToCSS:
|
||||||
|
|
||||||
""" The purpose of the StyleToCSS class is to contain the rules to convert
|
""" The purpose of the StyleToCSS class is to contain the rules to convert
|
||||||
@ -317,6 +318,7 @@ class TagStack:
|
|||||||
if attr in attrs:
|
if attr in attrs:
|
||||||
return attrs[attr]
|
return attrs[attr]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def count_tags(self, tag):
|
def count_tags(self, tag):
|
||||||
c = 0
|
c = 0
|
||||||
for ttag, tattrs in self.stack:
|
for ttag, tattrs in self.stack:
|
||||||
@ -324,6 +326,7 @@ class TagStack:
|
|||||||
c = c + 1
|
c = c + 1
|
||||||
return c
|
return c
|
||||||
|
|
||||||
|
|
||||||
special_styles = {
|
special_styles = {
|
||||||
'S-Emphasis':'em',
|
'S-Emphasis':'em',
|
||||||
'S-Citation':'cite',
|
'S-Citation':'cite',
|
||||||
@ -352,6 +355,8 @@ special_styles = {
|
|||||||
# ODFCONTENTHANDLER
|
# ODFCONTENTHANDLER
|
||||||
#
|
#
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
class ODF2XHTML(handler.ContentHandler):
|
class ODF2XHTML(handler.ContentHandler):
|
||||||
|
|
||||||
""" The ODF2XHTML parses an ODF file and produces XHTML"""
|
""" The ODF2XHTML parses an ODF file and produces XHTML"""
|
||||||
@ -625,9 +630,6 @@ class ODF2XHTML(handler.ContentHandler):
|
|||||||
self.anchors[name] = "anchor%d" % (len(self.anchors) + 1)
|
self.anchors[name] = "anchor%d" % (len(self.anchors) + 1)
|
||||||
return self.anchors.get(name)
|
return self.anchors.get(name)
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------
|
|
||||||
|
|
||||||
def purgedata(self):
|
def purgedata(self):
|
||||||
self.data = []
|
self.data = []
|
||||||
|
|
||||||
@ -1457,7 +1459,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
|
|||||||
# self.writeout( escape(mark) )
|
# self.writeout( escape(mark) )
|
||||||
# Since HTML only knows about endnotes, there is too much risk that the
|
# Since HTML only knows about endnotes, there is too much risk that the
|
||||||
# marker is reused in the source. Therefore we force numeric markers
|
# marker is reused in the source. Therefore we force numeric markers
|
||||||
self.writeout(unicode(self.currentnote))
|
self.writeout(type(u'')(self.currentnote))
|
||||||
self.closetag('a')
|
self.closetag('a')
|
||||||
self.closetag('sup')
|
self.closetag('sup')
|
||||||
|
|
||||||
@ -1566,7 +1568,6 @@ dl.notes dd:last-of-type { page-break-after: avoid }
|
|||||||
self.writedata()
|
self.writedata()
|
||||||
self.purgedata()
|
self.purgedata()
|
||||||
|
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
#
|
#
|
||||||
# Reading the file
|
# Reading the file
|
||||||
@ -1593,7 +1594,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
|
|||||||
self._walknode(c)
|
self._walknode(c)
|
||||||
self.endElementNS(node.qname, node.tagName)
|
self.endElementNS(node.qname, node.tagName)
|
||||||
if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
|
if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
|
||||||
self.characters(unicode(node))
|
self.characters(type(u'')(node))
|
||||||
|
|
||||||
def odf2xhtml(self, odffile):
|
def odf2xhtml(self, odffile):
|
||||||
""" Load a file and return the XHTML
|
""" Load a file and return the XHTML
|
||||||
|
@ -8,10 +8,6 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
try:
|
|
||||||
unicode
|
|
||||||
except NameError:
|
|
||||||
unicode = str
|
|
||||||
|
|
||||||
def jsonify(tokens):
|
def jsonify(tokens):
|
||||||
"""Turn tokens into "JSON-compatible" data structures."""
|
"""Turn tokens into "JSON-compatible" data structures."""
|
||||||
@ -24,6 +20,7 @@ def jsonify(tokens):
|
|||||||
else:
|
else:
|
||||||
yield token.type, token.value
|
yield token.type, token.value
|
||||||
|
|
||||||
|
|
||||||
class BaseTest(unittest.TestCase):
|
class BaseTest(unittest.TestCase):
|
||||||
|
|
||||||
longMessage = True
|
longMessage = True
|
||||||
@ -34,10 +31,8 @@ class BaseTest(unittest.TestCase):
|
|||||||
"""Test not complete error messages but only substrings."""
|
"""Test not complete error messages but only substrings."""
|
||||||
self.ae(len(errors), len(expected_errors))
|
self.ae(len(errors), len(expected_errors))
|
||||||
for error, expected in zip(errors, expected_errors):
|
for error, expected in zip(errors, expected_errors):
|
||||||
self.assertIn(expected, unicode(error))
|
self.assertIn(expected, type(u'')(error))
|
||||||
|
|
||||||
def jsonify_declarations(self, rule):
|
def jsonify_declarations(self, rule):
|
||||||
return [(decl.name, list(jsonify(decl.value)))
|
return [(decl.name, list(jsonify(decl.value)))
|
||||||
for decl in rule.declarations]
|
for decl in rule.declarations]
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user