Some more fixes for the unicode type

Now replaced in all dynamically loaded code. Recipes/metadata
sources/etc. In the case of recipes, since they get compiled by calibre
we simply make the unicode/unichr names available, no need for any
changes to the actual recipes themselves.
This commit is contained in:
Kovid Goyal 2019-03-13 12:04:09 +05:30
parent 6ad22b392b
commit 2d21a8efa2
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
33 changed files with 158 additions and 154 deletions

View File

@ -36,7 +36,7 @@ def merge():
for child in svg.iterchildren('*'): for child in svg.iterchildren('*'):
clone_node(child, symbol) clone_node(child, symbol)
ans.append(symbol) ans.append(symbol)
ans = etree.tostring(ans, encoding=unicode, pretty_print=True, with_tail=False) ans = etree.tostring(ans, encoding='unicode', pretty_print=True, with_tail=False)
ans = re.sub('<svg[^>]+>', '<svg style="display:none">', ans, count=1) ans = re.sub('<svg[^>]+>', '<svg style="display:none">', ans, count=1)
return ans return ans

View File

@ -168,7 +168,7 @@ def sort_languages(x):
lc, name = x lc, name = x
if lc == language: if lc == language:
return '' return ''
return sort_key(unicode(name)) return sort_key(type(u'')(name))
html_context['other_languages'].sort(key=sort_languages) html_context['other_languages'].sort(key=sort_languages)

View File

@ -198,7 +198,7 @@ def generate_ebook_convert_help(preamble, app):
def update_cli_doc(name, raw, app): def update_cli_doc(name, raw, app):
if isinstance(raw, unicode): if isinstance(raw, type(u'')):
raw = raw.encode('utf-8') raw = raw.encode('utf-8')
path = 'generated/%s/%s.rst' % (app.config.language, name) path = 'generated/%s/%s.rst' % (app.config.language, name)
old_raw = open(path, 'rb').read() if os.path.exists(path) else '' old_raw = open(path, 'rb').read() if os.path.exists(path) else ''

View File

@ -21,6 +21,7 @@ prefs = JSONConfig('plugins/interface_demo')
# Set defaults # Set defaults
prefs.defaults['hello_world_msg'] = 'Hello, World!' prefs.defaults['hello_world_msg'] = 'Hello, World!'
class ConfigWidget(QWidget): class ConfigWidget(QWidget):
def __init__(self): def __init__(self):
@ -37,5 +38,4 @@ class ConfigWidget(QWidget):
self.label.setBuddy(self.msg) self.label.setBuddy(self.msg)
def save_settings(self): def save_settings(self):
prefs['hello_world_msg'] = unicode(self.msg.text()) prefs['hello_world_msg'] = self.msg.text()

View File

@ -139,7 +139,7 @@ else:
enc = preferred_encoding enc = preferred_encoding
safe_encode = kwargs.get('safe_encode', False) safe_encode = kwargs.get('safe_encode', False)
for i, arg in enumerate(args): for i, arg in enumerate(args):
if isinstance(arg, unicode): if isinstance(arg, type(u'')):
try: try:
arg = arg.encode(enc) arg = arg.encode(enc)
except UnicodeEncodeError: except UnicodeEncodeError:
@ -150,8 +150,8 @@ else:
try: try:
arg = str(arg) arg = str(arg)
except ValueError: except ValueError:
arg = unicode(arg) arg = type(u'')(arg)
if isinstance(arg, unicode): if isinstance(arg, type(u'')):
try: try:
arg = arg.encode(enc) arg = arg.encode(enc)
except UnicodeEncodeError: except UnicodeEncodeError:

View File

@ -1795,41 +1795,41 @@ class UnicodeDammit:
elif xml_data[:4] == '\x00\x3c\x00\x3f': elif xml_data[:4] == '\x00\x3c\x00\x3f':
# UTF-16BE # UTF-16BE
sniffed_xml_encoding = 'utf-16be' sniffed_xml_encoding = 'utf-16be'
#xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') #xml_data = type(u'')(xml_data, 'utf-16be').encode('utf-8')
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
and (xml_data[2:4] != '\x00\x00'): and (xml_data[2:4] != '\x00\x00'):
# UTF-16BE with BOM # UTF-16BE with BOM
sniffed_xml_encoding = 'utf-16be' sniffed_xml_encoding = 'utf-16be'
#xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') #xml_data = type(u'')(xml_data[2:], 'utf-16be').encode('utf-8')
elif xml_data[:4] == '\x3c\x00\x3f\x00': elif xml_data[:4] == '\x3c\x00\x3f\x00':
# UTF-16LE # UTF-16LE
sniffed_xml_encoding = 'utf-16le' sniffed_xml_encoding = 'utf-16le'
#xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') #xml_data = type(u'')(xml_data, 'utf-16le').encode('utf-8')
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
(xml_data[2:4] != '\x00\x00'): (xml_data[2:4] != '\x00\x00'):
# UTF-16LE with BOM # UTF-16LE with BOM
sniffed_xml_encoding = 'utf-16le' sniffed_xml_encoding = 'utf-16le'
#xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') #xml_data = type(u'')(xml_data[2:], 'utf-16le').encode('utf-8')
elif xml_data[:4] == '\x00\x00\x00\x3c': elif xml_data[:4] == '\x00\x00\x00\x3c':
# UTF-32BE # UTF-32BE
sniffed_xml_encoding = 'utf-32be' sniffed_xml_encoding = 'utf-32be'
#xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') #xml_data = type(u'')(xml_data, 'utf-32be').encode('utf-8')
elif xml_data[:4] == '\x3c\x00\x00\x00': elif xml_data[:4] == '\x3c\x00\x00\x00':
# UTF-32LE # UTF-32LE
sniffed_xml_encoding = 'utf-32le' sniffed_xml_encoding = 'utf-32le'
#xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') #xml_data = type(u'')(xml_data, 'utf-32le').encode('utf-8')
elif xml_data[:4] == '\x00\x00\xfe\xff': elif xml_data[:4] == '\x00\x00\xfe\xff':
# UTF-32BE with BOM # UTF-32BE with BOM
sniffed_xml_encoding = 'utf-32be' sniffed_xml_encoding = 'utf-32be'
#xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') #xml_data = type(u'')(xml_data[4:], 'utf-32be').encode('utf-8')
elif xml_data[:4] == '\xff\xfe\x00\x00': elif xml_data[:4] == '\xff\xfe\x00\x00':
# UTF-32LE with BOM # UTF-32LE with BOM
sniffed_xml_encoding = 'utf-32le' sniffed_xml_encoding = 'utf-32le'
#xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') #xml_data = type(u'')(xml_data[4:], 'utf-32le').encode('utf-8')
elif xml_data[:3] == '\xef\xbb\xbf': elif xml_data[:3] == '\xef\xbb\xbf':
# UTF-8 with BOM # UTF-8 with BOM
sniffed_xml_encoding = 'utf-8' sniffed_xml_encoding = 'utf-8'
#xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') #xml_data = type(u'')(xml_data[3:], 'utf-8').encode('utf-8')
else: else:
sniffed_xml_encoding = 'ascii' sniffed_xml_encoding = 'ascii'
pass pass

View File

@ -93,7 +93,7 @@ def parse_details_page(url, log, timeout, browser, domain):
errmsg = root.xpath('//*[@id="errorMessage"]') errmsg = root.xpath('//*[@id="errorMessage"]')
if errmsg: if errmsg:
msg = 'Failed to parse amazon details page: %r' % url msg = 'Failed to parse amazon details page: %r' % url
msg += tostring(errmsg, method='text', encoding=unicode).strip() msg += tostring(errmsg, method='text', encoding='unicode').strip()
log.error(msg) log.error(msg)
return return
@ -466,7 +466,7 @@ class Worker(Thread): # Get details {{{
self.result_queue.put(mi) self.result_queue.put(mi)
def totext(self, elem): def totext(self, elem):
return self.tostring(elem, encoding=unicode, method='text').strip() return self.tostring(elem, encoding='unicode', method='text').strip()
def parse_title(self, root): def parse_title(self, root):
h1 = root.xpath('//h1[@id="title"]') h1 = root.xpath('//h1[@id="title"]')
@ -478,10 +478,10 @@ class Worker(Thread): # Get details {{{
tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0] tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0]
actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]') actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]')
if actual_title: if actual_title:
title = self.tostring(actual_title[0], encoding=unicode, title = self.tostring(actual_title[0], encoding='unicode',
method='text').strip() method='text').strip()
else: else:
title = self.tostring(tdiv, encoding=unicode, title = self.tostring(tdiv, encoding='unicode',
method='text').strip() method='text').strip()
ans = re.sub(r'[(\[].*[)\]]', '', title).strip() ans = re.sub(r'[(\[].*[)\]]', '', title).strip()
if not ans: if not ans:
@ -508,7 +508,7 @@ class Worker(Thread): # Get details {{{
''') ''')
for x in aname: for x in aname:
x.tail = '' x.tail = ''
authors = [self.tostring(x, encoding=unicode, method='text').strip() for x authors = [self.tostring(x, encoding='unicode', method='text').strip() for x
in aname] in aname]
authors = [a for a in authors if a] authors = [a for a in authors if a]
return authors return authors
@ -559,7 +559,7 @@ class Worker(Thread): # Get details {{{
for a in desc.xpath('descendant::a[@href]'): for a in desc.xpath('descendant::a[@href]'):
del a.attrib['href'] del a.attrib['href']
a.tag = 'span' a.tag = 'span'
desc = self.tostring(desc, method='html', encoding=unicode).strip() desc = self.tostring(desc, method='html', encoding='unicode').strip()
# Encoding bug in Amazon data U+fffd (replacement char) # Encoding bug in Amazon data U+fffd (replacement char)
# in some examples it is present in place of ' # in some examples it is present in place of '
@ -626,14 +626,14 @@ class Worker(Thread): # Get details {{{
spans = series.xpath('./span') spans = series.xpath('./span')
if spans: if spans:
raw = self.tostring( raw = self.tostring(
spans[0], encoding=unicode, method='text', with_tail=False).strip() spans[0], encoding='unicode', method='text', with_tail=False).strip()
m = re.search(r'\s+([0-9.]+)$', raw.strip()) m = re.search(r'\s+([0-9.]+)$', raw.strip())
if m is not None: if m is not None:
series_index = float(m.group(1)) series_index = float(m.group(1))
s = series.xpath('./a[@id="series-page-link"]') s = series.xpath('./a[@id="series-page-link"]')
if s: if s:
series = self.tostring( series = self.tostring(
s[0], encoding=unicode, method='text', with_tail=False).strip() s[0], encoding='unicode', method='text', with_tail=False).strip()
if series: if series:
ans = (series, series_index) ans = (series, series_index)
# This is found on Kindle edition pages on amazon.com # This is found on Kindle edition pages on amazon.com
@ -646,7 +646,7 @@ class Worker(Thread): # Get details {{{
a = span.xpath('./a[@href]') a = span.xpath('./a[@href]')
if a: if a:
series = self.tostring( series = self.tostring(
a[0], encoding=unicode, method='text', with_tail=False).strip() a[0], encoding='unicode', method='text', with_tail=False).strip()
if series: if series:
ans = (series, series_index) ans = (series, series_index)
# This is found on newer Kindle edition pages on amazon.com # This is found on newer Kindle edition pages on amazon.com
@ -659,14 +659,14 @@ class Worker(Thread): # Get details {{{
a = b.getparent().xpath('./a[@href]') a = b.getparent().xpath('./a[@href]')
if a: if a:
series = self.tostring( series = self.tostring(
a[0], encoding=unicode, method='text', with_tail=False).partition('(')[0].strip() a[0], encoding='unicode', method='text', with_tail=False).partition('(')[0].strip()
if series: if series:
ans = series, series_index ans = series, series_index
if ans == (None, None): if ans == (None, None):
desc = root.xpath('//div[@id="ps-content"]/div[@class="buying"]') desc = root.xpath('//div[@id="ps-content"]/div[@class="buying"]')
if desc: if desc:
raw = self.tostring(desc[0], method='text', encoding=unicode) raw = self.tostring(desc[0], method='text', encoding='unicode')
raw = re.sub(r'\s+', ' ', raw) raw = re.sub(r'\s+', ' ', raw)
match = self.series_pat.search(raw) match = self.series_pat.search(raw)
if match is not None: if match is not None:
@ -1161,7 +1161,7 @@ class Amazon(Source):
if not result_links: if not result_links:
result_links = root.xpath(r'//li[starts-with(@id, "result_")]//a[@href and contains(@class, "s-access-detail-page")]') result_links = root.xpath(r'//li[starts-with(@id, "result_")]//a[@href and contains(@class, "s-access-detail-page")]')
for a in result_links: for a in result_links:
title = tostring(a, method='text', encoding=unicode) title = tostring(a, method='text', encoding='unicode')
if title_ok(title): if title_ok(title):
url = a.get('href') url = a.get('href')
if url.startswith('/'): if url.startswith('/'):
@ -1177,7 +1177,7 @@ class Amazon(Source):
# New amazon markup # New amazon markup
links = div.xpath('descendant::h3/a[@href]') links = div.xpath('descendant::h3/a[@href]')
for a in links: for a in links:
title = tostring(a, method='text', encoding=unicode) title = tostring(a, method='text', encoding='unicode')
if title_ok(title): if title_ok(title):
url = a.get('href') url = a.get('href')
if url.startswith('/'): if url.startswith('/'):
@ -1192,7 +1192,7 @@ class Amazon(Source):
for td in root.xpath( for td in root.xpath(
r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'): r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'):
for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'): for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
title = tostring(a, method='text', encoding=unicode) title = tostring(a, method='text', encoding='unicode')
if title_ok(title): if title_ok(title):
url = a.get('href') url = a.get('href')
if url.startswith('/'): if url.startswith('/'):

View File

@ -99,7 +99,7 @@ def main(args=sys.argv):
log = buf.getvalue() log = buf.getvalue()
result = (metadata_to_opf(result) if opts.opf else result = (metadata_to_opf(result) if opts.opf else
unicode(result).encode('utf-8')) type(u'')(result).encode('utf-8'))
if opts.verbose: if opts.verbose:
print (log, file=sys.stderr) print (log, file=sys.stderr)

View File

@ -203,7 +203,7 @@ class Douban(Source):
build_term('author', author_tokens)) build_term('author', author_tokens))
t = 'search' t = 'search'
q = q.strip() q = q.strip()
if isinstance(q, unicode): if isinstance(q, type(u'')):
q = q.encode('utf-8') q = q.encode('utf-8')
if not q: if not q:
return None return None

View File

@ -31,7 +31,7 @@ def parse_html(raw):
def astext(node): def astext(node):
from lxml import etree from lxml import etree
return etree.tostring(node, method='text', encoding=unicode, return etree.tostring(node, method='text', encoding='unicode',
with_tail=False).strip() with_tail=False).strip()
@ -110,7 +110,7 @@ class Worker(Thread): # {{{
for a in desc.xpath('descendant::a[@href]'): for a in desc.xpath('descendant::a[@href]'):
del a.attrib['href'] del a.attrib['href']
a.tag = 'span' a.tag = 'span'
desc = etree.tostring(desc, method='html', encoding=unicode).strip() desc = etree.tostring(desc, method='html', encoding='unicode').strip()
# remove all attributes from tags # remove all attributes from tags
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc) desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
@ -160,7 +160,7 @@ def get_basic_data(browser, log, *skus):
tags = [] tags = []
rating = 0 rating = 0
for bar in row.xpath('descendant::*[contains(@class, "bgdColorCommunity")]/@style'): for bar in row.xpath('descendant::*[contains(@class, "bgdColorCommunity")]/@style'):
m = re.search('width: (\d+)px;.*max-width: (\d+)px', bar) m = re.search(r'width: (\d+)px;.*max-width: (\d+)px', bar)
if m is not None: if m is not None:
rating = float(m.group(1)) / float(m.group(2)) rating = float(m.group(1)) / float(m.group(2))
break break
@ -283,7 +283,7 @@ class Edelweiss(Source):
except Exception as e: except Exception as e:
log.exception('Failed to make identify query: %r'%query) log.exception('Failed to make identify query: %r'%query)
return as_unicode(e) return as_unicode(e)
items = re.search('window[.]items\s*=\s*(.+?);', raw) items = re.search(r'window[.]items\s*=\s*(.+?);', raw)
if items is None: if items is None:
log.error('Failed to get list of matching items') log.error('Failed to get list of matching items')
log.debug('Response text:') log.debug('Response text:')

View File

@ -214,7 +214,7 @@ class GoogleBooks(Source):
if author_tokens: if author_tokens:
q += ('+' if q else '') + build_term('author', author_tokens) q += ('+' if q else '') + build_term('author', author_tokens)
if isinstance(q, unicode): if isinstance(q, type(u'')):
q = q.encode('utf-8') q = q.encode('utf-8')
if not q: if not q:
return None return None

View File

@ -471,7 +471,7 @@ def identify(log, abort, # {{{
for r in presults: for r in presults:
log('\n\n---') log('\n\n---')
try: try:
log(unicode(r)) log(type(u'')(r))
except TypeError: except TypeError:
log(repr(r)) log(repr(r))
if plog: if plog:

View File

@ -233,7 +233,7 @@ class OverDrive(Source):
xreq.add_header('Referer', q_init_search) xreq.add_header('Referer', q_init_search)
xreq.add_header('Accept', 'application/json, text/javascript, */*') xreq.add_header('Accept', 'application/json, text/javascript, */*')
raw = br.open_novisit(xreq).read() raw = br.open_novisit(xreq).read()
for m in re.finditer(unicode(r'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)'), raw): for m in re.finditer(type(u'')(r'"iTotalDisplayRecords":(?P<displayrecords>\d+).*?"iTotalRecords":(?P<totalrecords>\d+)'), raw):
if int(m.group('totalrecords')) == 0: if int(m.group('totalrecords')) == 0:
return '' return ''
elif int(m.group('displayrecords')) >= 1: elif int(m.group('displayrecords')) >= 1:
@ -450,7 +450,7 @@ class OverDrive(Source):
if desc: if desc:
desc = desc[0] desc = desc[0]
desc = html.tostring(desc, method='html', encoding=unicode).strip() desc = html.tostring(desc, method='html', encoding='unicode').strip()
# remove all attributes from tags # remove all attributes from tags
desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc) desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
# Remove comments # Remove comments

View File

@ -100,7 +100,7 @@ class Ozon(Source):
qItems.discard('') qItems.discard('')
searchText = u' '.join(qItems).strip() searchText = u' '.join(qItems).strip()
if isinstance(searchText, unicode): if isinstance(searchText, type(u'')):
searchText = searchText.encode('utf-8') searchText = searchText.encode('utf-8')
if not searchText: if not searchText:
return None return None
@ -148,7 +148,7 @@ class Ozon(Source):
else: else:
# Redirect page: trying to extract ozon_id from javascript data # Redirect page: trying to extract ozon_id from javascript data
h = HTMLParser() h = HTMLParser()
entry_string = (h.unescape(etree.tostring(doc, pretty_print=True, encoding=unicode))) entry_string = (h.unescape(etree.tostring(doc, pretty_print=True, encoding='unicode')))
json_pat = re.compile(r'dataLayer\s*=\s*(.+)?;') json_pat = re.compile(r'dataLayer\s*=\s*(.+)?;')
json_info = re.search(json_pat, entry_string) json_info = re.search(json_pat, entry_string)
jsondata = json_info.group(1) if json_info else None jsondata = json_info.group(1) if json_info else None
@ -198,16 +198,16 @@ class Ozon(Source):
reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]') reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]')
title = unicode(title).upper() if title else '' title = type(u'')(title).upper() if title else ''
if reRemoveFromTitle: if reRemoveFromTitle:
title = reRemoveFromTitle.sub('', title) title = reRemoveFromTitle.sub('', title)
authors = map(_normalizeAuthorNameWithInitials, authors = map(_normalizeAuthorNameWithInitials,
map(unicode.upper, map(unicode, authors))) if authors else None map(type(u'').upper, map(type(u''), authors))) if authors else None
ozon_id = identifiers.get('ozon', None) ozon_id = identifiers.get('ozon', None)
# log.debug(u'ozonid: ', ozon_id) # log.debug(u'ozonid: ', ozon_id)
unk = unicode(_('Unknown')).upper() unk = type(u'')(_('Unknown')).upper()
if title == unk: if title == unk:
title = None title = None
@ -226,7 +226,7 @@ class Ozon(Source):
def calc_source_relevance(mi): # {{{ def calc_source_relevance(mi): # {{{
relevance = 0 relevance = 0
if title: if title:
mititle = unicode(mi.title).upper() if mi.title else '' mititle = type(u'')(mi.title).upper() if mi.title else ''
if reRemoveFromTitle: if reRemoveFromTitle:
mititle = reRemoveFromTitle.sub('', mititle) mititle = reRemoveFromTitle.sub('', mititle)
@ -240,7 +240,7 @@ class Ozon(Source):
relevance += 1 relevance += 1
if authors: if authors:
miauthors = map(unicode.upper, map(unicode, mi.authors)) if mi.authors else [] miauthors = map(type(u'').upper, map(type(u''), mi.authors)) if mi.authors else []
# log.debug('Authors %s vs miauthors %s'%(','.join(authors), ','.join(miauthors))) # log.debug('Authors %s vs miauthors %s'%(','.join(authors), ','.join(miauthors)))
if (in_authors(authors, miauthors)): if (in_authors(authors, miauthors)):
@ -320,13 +320,13 @@ class Ozon(Source):
# }}} # }}}
def to_metadata(self, log, entry): # {{{ def to_metadata(self, log, entry): # {{{
title = unicode(entry.xpath(u'normalize-space(.//div[@itemprop="name"][1]/text())')) title = type(u'')(entry.xpath(u'normalize-space(.//div[@itemprop="name"][1]/text())'))
# log.debug(u'Title: -----> %s' % title) # log.debug(u'Title: -----> %s' % title)
author = unicode(entry.xpath(u'normalize-space(.//div[contains(@class, "mPerson")])')) author = type(u'')(entry.xpath(u'normalize-space(.//div[contains(@class, "mPerson")])'))
# log.debug(u'Author: -----> %s' % author) # log.debug(u'Author: -----> %s' % author)
norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u','))) norm_authors = map(_normalizeAuthorNameWithInitials, map(type(u'').strip, type(u'')(author).split(u',')))
mi = Metadata(title, norm_authors) mi = Metadata(title, norm_authors)
ozon_id = entry.get('data-href').split('/')[-2] ozon_id = entry.get('data-href').split('/')[-2]
@ -524,7 +524,7 @@ class Ozon(Source):
# comments, from Javascript data # comments, from Javascript data
beginning = fullString.find(u'FirstBlock') beginning = fullString.find(u'FirstBlock')
end = fullString.find(u'}', beginning) end = fullString.find(u'}', beginning)
comments = unicode(fullString[beginning + 75:end - 1]).decode("unicode-escape") comments = type(u'')(fullString[beginning + 75:end - 1]).decode("unicode-escape")
metadata.comments = replace_entities(comments, 'utf-8') metadata.comments = replace_entities(comments, 'utf-8')
# }}} # }}}
@ -603,7 +603,7 @@ def _format_isbn(log, isbn): # {{{
def _translageLanguageToCode(displayLang): # {{{ def _translageLanguageToCode(displayLang): # {{{
displayLang = unicode(displayLang).strip() if displayLang else None displayLang = type(u'')(displayLang).strip() if displayLang else None
langTbl = {None: 'ru', langTbl = {None: 'ru',
u'Русский': 'ru', u'Русский': 'ru',
u'Немецкий': 'de', u'Немецкий': 'de',
@ -627,9 +627,9 @@ def _normalizeAuthorNameWithInitials(name): # {{{
if name: if name:
re1 = r'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$' re1 = r'^(?P<lname>\S+)\s+(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?$'
re2 = r'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$' re2 = r'^(?P<fname>[^\d\W]\.)(?:\s*(?P<mname>[^\d\W]\.))?\s+(?P<lname>\S+)$'
matcher = re.match(re1, unicode(name), re.UNICODE) matcher = re.match(re1, type(u'')(name), re.UNICODE)
if not matcher: if not matcher:
matcher = re.match(re2, unicode(name), re.UNICODE) matcher = re.match(re2, type(u'')(name), re.UNICODE)
if matcher: if matcher:
d = matcher.groupdict() d = matcher.groupdict()
@ -653,7 +653,7 @@ def toPubdate(log, yearAsString): # {{{
# }}} # }}}
def _listToUnicodePrintStr(lst): # {{{ def _listToUnicodePrintStr(lst): # {{{
return u'[' + u', '.join(unicode(x) for x in lst) + u']' return u'[' + u', '.join(type(u'')(x) for x in lst) + u']'
# }}} # }}}

View File

@ -26,7 +26,7 @@ Result = namedtuple('Result', 'url title cached_url')
def tostring(elem): def tostring(elem):
return etree.tostring(elem, encoding=unicode, method='text', with_tail=False) return etree.tostring(elem, encoding='unicode', method='text', with_tail=False)
def browser(): def browser():

View File

@ -128,11 +128,11 @@ class Textile(object):
pnct = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]' pnct = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]'
# urlch = r'[\w"$\-_.+!*\'(),";/?:@=&%#{}|\\^~\[\]`]' # urlch = r'[\w"$\-_.+!*\'(),";/?:@=&%#{}|\\^~\[\]`]'
urlch = '[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]' urlch = r'[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]'
url_schemes = ('http', 'https', 'ftp', 'mailto') url_schemes = ('http', 'https', 'ftp', 'mailto')
btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p') btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', r'fn\d+', 'p')
btag_lite = ('bq', 'bc', 'p') btag_lite = ('bq', 'bc', 'p')
macro_defaults = [ macro_defaults = [
@ -292,7 +292,7 @@ class Textile(object):
""" """
self.html_type = html_type self.html_type = html_type
# text = unicode(text) # text = type(u'')(text)
text = _normalize_newlines(text) text = _normalize_newlines(text)
if self.restricted: if self.restricted:

View File

@ -21,7 +21,9 @@ class BiblioStore(BasicStoreConfig, OpenSearchOPDSStore):
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
# check for cyrillic symbols before performing search # check for cyrillic symbols before performing search
uquery = unicode(query.strip(), 'utf-8') if isinstance(query, bytes):
query = query.decode('utf-8')
uquery = query.strip()
reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery) reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery)
if not reObj: if not reObj:
return return

View File

@ -43,7 +43,9 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
# check for cyrillic symbols before performing search # check for cyrillic symbols before performing search
uquery = unicode(query.strip(), 'utf-8') if isinstance(query, bytes):
query = query.decode('utf-8')
uquery = query.strip()
reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery) reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery)
if not reObj: if not reObj:
return return
@ -56,7 +58,7 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
br = browser() br = browser()
try: try:
with closing(br.open(url, timeout=timeout)) as f: with closing(br.open(url, timeout=timeout)) as f:
f = unicode(f.read(), 'utf-8') f = f.read().decode('utf-8')
doc = html.fromstring(f) doc = html.fromstring(f)
for data in doc.xpath('//ul[@class="superlist booklist"]/li'): for data in doc.xpath('//ul[@class="superlist booklist"]/li'):
@ -98,7 +100,7 @@ class ChitankaStore(BasicStoreConfig, StorePlugin):
with closing(br2.open(base_url + author_url, timeout=timeout)) as f: with closing(br2.open(base_url + author_url, timeout=timeout)) as f:
if counter <= 0: if counter <= 0:
break break
f = unicode(f.read(), 'utf-8') f = f.read().decode('utf-8')
doc2 = html.fromstring(f) doc2 = html.fromstring(f)
# search for book title # search for book title

View File

@ -22,10 +22,9 @@ class EbooksGratuitsStore(BasicStoreConfig, OpenSearchOPDSStore):
return ascii_text(s) return ascii_text(s)
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
query = self.strip_accents(unicode(query)) query = self.strip_accents(type(u'')(query))
for s in OpenSearchOPDSStore.search(self, query, max_results, timeout): for s in OpenSearchOPDSStore.search(self, query, max_results, timeout):
if s.downloads: if s.downloads:
s.drm = SearchResult.DRM_UNLOCKED s.drm = SearchResult.DRM_UNLOCKED
s.price = '$0.00' s.price = '$0.00'
yield s yield s

View File

@ -49,7 +49,7 @@ class eKnigiStore(BasicStoreConfig, StorePlugin):
def search(self, query, max_results=10, timeout=60): def search(self, query, max_results=10, timeout=60):
# check for cyrillic symbols before performing search # check for cyrillic symbols before performing search
uquery = unicode(query.strip(), 'utf-8') uquery = type(u'')(query.strip(), 'utf-8')
reObj = re.search(u'^[а-яА-Я\\d\\s]{2,}$', uquery) reObj = re.search(u'^[а-яА-Я\\d\\s]{2,}$', uquery)
if not reObj: if not reObj:
return return

View File

@ -46,7 +46,7 @@ def search_kobo(query, max_results=10, timeout=60, write_html_to=None):
cover_url = None cover_url = None
for p in select('p.title', item): for p in select('p.title', item):
title = etree.tostring(p, method='text', encoding=unicode).strip() title = etree.tostring(p, method='text', encoding='unicode').strip()
for a in select('a[href]', p): for a in select('a[href]', p):
url = a.get('href') url = a.get('href')
break break
@ -58,11 +58,11 @@ def search_kobo(query, max_results=10, timeout=60, write_html_to=None):
authors = [] authors = []
for a in select('p.contributor-list a.contributor-name', item): for a in select('p.contributor-list a.contributor-name', item):
authors.append(etree.tostring(a, method='text', encoding=unicode).strip()) authors.append(etree.tostring(a, method='text', encoding='unicode').strip())
authors = authors_to_string(authors) authors = authors_to_string(authors)
for p in select('p.price', item): for p in select('p.price', item):
price = etree.tostring(p, method='text', encoding=unicode).strip() price = etree.tostring(p, method='text', encoding='unicode').strip()
break break
else: else:
price = None price = None

View File

@ -88,7 +88,7 @@ class LitResStore(BasicStoreConfig, StorePlugin):
authors = data.xpath('.//title-info/author/first-name/text()|' authors = data.xpath('.//title-info/author/first-name/text()|'
'.//title-info/author/middle-name/text()|' './/title-info/author/middle-name/text()|'
'.//title-info/author/last-name/text()') './/title-info/author/last-name/text()')
sRes.author = u' '.join(map(unicode, authors)) sRes.author = u' '.join(map(type(u''), authors))
sRes.price = data.xpath(xp_template.format('price')) sRes.price = data.xpath(xp_template.format('price'))
# cover vs cover_preview # cover vs cover_preview
sRes.cover_url = data.xpath(xp_template.format('cover_preview')) sRes.cover_url = data.xpath(xp_template.format('cover_preview'))
@ -107,7 +107,7 @@ def format_price_in_RUR(price):
@return: formatted price if possible otherwise original value @return: formatted price if possible otherwise original value
@rtype: unicode @rtype: unicode
''' '''
if price and re.match("^\d*?\.\d*?$", price): if price and re.match(r"^\d*?\.\d*?$", price):
try: try:
price = u'{:,.2F} руб.'.format(float(price)) price = u'{:,.2F} руб.'.format(float(price))
price = price.replace(',', ' ').replace('.', ',', 1) price = price.replace(',', ' ').replace('.', ',', 1)

View File

@ -67,7 +67,7 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
self.mc = '=' self.mc = '='
else: else:
self.mc = '~' self.mc = '~'
all, any, phrase, none = map(lambda x: unicode(x.text()), all, any, phrase, none = map(lambda x: type(u'')(x.text()),
(self.all, self.any, self.phrase, self.none)) (self.all, self.any, self.phrase, self.none))
all, any, none = map(self.tokens, (all, any, none)) all, any, none = map(self.tokens, (all, any, none))
phrase = phrase.strip() phrase = phrase.strip()
@ -86,11 +86,11 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
return ans return ans
def token(self): def token(self):
txt = unicode(self.text.text()).strip() txt = type(u'')(self.text.text()).strip()
if txt: if txt:
if self.negate.isChecked(): if self.negate.isChecked():
txt = '!'+txt txt = '!'+txt
tok = self.FIELDS[unicode(self.field.currentText())]+txt tok = self.FIELDS[type(u'')(self.field.currentText())]+txt
if re.search(r'\s', tok): if re.search(r'\s', tok):
tok = '"%s"'%tok tok = '"%s"'%tok
return tok return tok
@ -106,13 +106,13 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog):
ans = [] ans = []
self.box_last_values = {} self.box_last_values = {}
title = unicode(self.title_box.text()).strip() title = type(u'')(self.title_box.text()).strip()
if title: if title:
ans.append('title:"' + self.mc + title + '"') ans.append('title:"' + self.mc + title + '"')
author = unicode(self.author_box.text()).strip() author = type(u'')(self.author_box.text()).strip()
if author: if author:
ans.append('author:"' + self.mc + author + '"') ans.append('author:"' + self.mc + author + '"')
format = unicode(self.format_box.text()).strip() format = type(u'')(self.format_box.text()).strip()
if format: if format:
ans.append('format:"' + self.mc + format + '"') ans.append('format:"' + self.mc + format + '"')
if ans: if ans:

View File

@ -22,7 +22,7 @@ class CacheUpdateThread(Thread, QObject):
total_changed = pyqtSignal(int) total_changed = pyqtSignal(int)
update_progress = pyqtSignal(int) update_progress = pyqtSignal(int)
update_details = pyqtSignal(unicode) update_details = pyqtSignal(type(u''))
def __init__(self, config, seralize_books_function, timeout): def __init__(self, config, seralize_books_function, timeout):
Thread.__init__(self) Thread.__init__(self)

View File

@ -105,7 +105,7 @@ class BooksModel(QAbstractItemModel):
return return
descending = order == Qt.DescendingOrder descending = order == Qt.DescendingOrder
self.books.sort(None, self.books.sort(None,
lambda x: sort_key(unicode(self.data_as_text(x, col))), lambda x: sort_key(type(u'')(self.data_as_text(x, col))),
descending) descending)
if reset: if reset:
self.beginResetModel(), self.endResetModel() self.beginResetModel(), self.endResetModel()

View File

@ -40,7 +40,7 @@ class MobileReadStoreDialog(QDialog, Ui_Dialog):
self.restore_state() self.restore_state()
def do_search(self): def do_search(self):
self.results_view.model().search(unicode(self.search_query.text())) self.results_view.model().search(type(u'')(self.search_query.text()))
def open_store(self, index): def open_store(self, index):
result = self.results_view.model().get_book(index) result = self.results_view.model().get_book(index)

View File

@ -6,9 +6,13 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import sys import sys
from calibre.constants import iswindows, preferred_encoding from calibre.constants import iswindows, preferred_encoding, ispy3
if ispy3:
from getpass import getpass
getpass
else:
def getpass(prompt): def getpass(prompt):
if iswindows: if iswindows:
# getpass is broken on windows with python 2.x and unicode, the # getpass is broken on windows with python 2.x and unicode, the

View File

@ -9,7 +9,7 @@ from calibre.web.feeds.news import (BasicNewsRecipe, CustomIndexRecipe,
AutomaticNewsRecipe, CalibrePeriodical) AutomaticNewsRecipe, CalibrePeriodical)
from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.utils.config import JSONConfig from calibre.utils.config import JSONConfig
from polyglot.builtins import unicode_type from polyglot.builtins import unicode_type, codepoint_to_chr
basic_recipes = (BasicNewsRecipe, AutomaticNewsRecipe, CustomIndexRecipe, basic_recipes = (BasicNewsRecipe, AutomaticNewsRecipe, CustomIndexRecipe,
CalibrePeriodical) CalibrePeriodical)
@ -44,7 +44,9 @@ def compile_recipe(src):
'BasicNewsRecipe':BasicNewsRecipe, 'BasicNewsRecipe':BasicNewsRecipe,
'AutomaticNewsRecipe':AutomaticNewsRecipe, 'AutomaticNewsRecipe':AutomaticNewsRecipe,
'time':time, 're':re, 'time':time, 're':re,
'BeautifulSoup':BeautifulSoup 'BeautifulSoup':BeautifulSoup,
'unicode': unicode_type,
'unichr': codepoint_to_chr,
} }
exec(src, namespace) exec(src, namespace)

View File

@ -15,20 +15,17 @@ import operator
import string import string
from css_selectors.errors import SelectorSyntaxError, ExpressionError from css_selectors.errors import SelectorSyntaxError, ExpressionError
from polyglot.builtins import unicode_type, codepoint_to_chr
if sys.version_info[0] < 3:
_unicode = unicode
_unichr = unichr
else:
_unicode = str
_unichr = chr
tab = string.maketrans(string.ascii_uppercase, string.ascii_lowercase) tab = string.maketrans(string.ascii_uppercase, string.ascii_lowercase)
utab = {c:c+32 for c in range(ord('A'), ord('Z')+1)} utab = {c:c+32 for c in range(ord('A'), ord('Z')+1)}
def ascii_lower(string): def ascii_lower(string):
"""Lower-case, but only in the ASCII range.""" """Lower-case, but only in the ASCII range."""
return string.translate(utab if isinstance(string, _unicode) else tab) return string.translate(utab if isinstance(string, unicode_type) else tab)
def urepr(x): def urepr(x):
if isinstance(x, list): if isinstance(x, list):
@ -38,6 +35,7 @@ def urepr(x):
ans = ans[1:] ans = ans[1:]
return ans return ans
# Parsed objects # Parsed objects
class Selector(object): class Selector(object):
@ -385,6 +383,7 @@ def parse_selector_group(stream):
else: else:
break break
def parse_selector(stream): def parse_selector(stream):
result, pseudo_element = parse_simple_selector(stream) result, pseudo_element = parse_simple_selector(stream)
while 1: while 1:
@ -461,7 +460,7 @@ def parse_simple_selector(stream, inside_negation=False):
'before', 'after'): 'before', 'after'):
# Special case: CSS 2.1 pseudo-elements can have a single ':' # Special case: CSS 2.1 pseudo-elements can have a single ':'
# Any new pseudo-element must have two. # Any new pseudo-element must have two.
pseudo_element = _unicode(ident) pseudo_element = unicode_type(ident)
continue continue
if stream.peek() != ('DELIM', '('): if stream.peek() != ('DELIM', '('):
result = Pseudo(result, ident) result = Pseudo(result, ident)
@ -626,11 +625,13 @@ class TokenMacros:
nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii) nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii)
nmstart = '[_a-z]|%s|%s' % (escape, nonascii) nmstart = '[_a-z]|%s|%s' % (escape, nonascii)
def _compile(pattern): def _compile(pattern):
return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match
_match_whitespace = _compile(r'[ \t\r\n\f]+') _match_whitespace = _compile(r'[ \t\r\n\f]+')
_match_number = _compile('[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)') _match_number = _compile(r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
_match_hash = _compile('#(?:%(nmchar)s)+') _match_hash = _compile('#(?:%(nmchar)s)+')
_match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*') _match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*')
_match_string_by_quote = { _match_string_by_quote = {
@ -650,11 +651,12 @@ else:
def _replace_simple(match): def _replace_simple(match):
return match.group(1) return match.group(1)
def _replace_unicode(match): def _replace_unicode(match):
codepoint = int(match.group(1), 16) codepoint = int(match.group(1), 16)
if codepoint > sys.maxunicode: if codepoint > sys.maxunicode:
codepoint = 0xFFFD codepoint = 0xFFFD
return _unichr(codepoint) return codepoint_to_chr(codepoint)
def unescape_ident(value): def unescape_ident(value):

View File

@ -29,7 +29,7 @@ def make_NCName(arg):
return arg return arg
def cnv_anyURI(attribute, arg, element): def cnv_anyURI(attribute, arg, element):
return unicode(arg) return type(u'')(arg)
def cnv_boolean(attribute, arg, element): def cnv_boolean(attribute, arg, element):
if arg.lower() in ("false","no"): if arg.lower() in ("false","no"):
@ -85,13 +85,13 @@ def cnv_family(attribute, arg, element):
def __save_prefix(attribute, arg, element): def __save_prefix(attribute, arg, element):
prefix = arg.split(':',1)[0] prefix = arg.split(':',1)[0]
if prefix == arg: if prefix == arg:
return unicode(arg) return type(u'')(arg)
namespace = element.get_knownns(prefix) namespace = element.get_knownns(prefix)
if namespace is None: if namespace is None:
#raise ValueError, "'%s' is an unknown prefix" % str(prefix) #raise ValueError, "'%s' is an unknown prefix" % str(prefix)
return unicode(arg) return type(u'')(arg)
p = element.get_nsprefix(namespace) p = element.get_nsprefix(namespace)
return unicode(arg) return type(u'')(arg)
def cnv_formula(attribute, arg, element): def cnv_formula(attribute, arg, element):
""" A string containing a formula. Formulas do not have a predefined syntax, but the string should """ A string containing a formula. Formulas do not have a predefined syntax, but the string should
@ -218,7 +218,7 @@ def cnv_positiveInteger(attribute, arg, element):
return str(arg) return str(arg)
def cnv_string(attribute, arg, element): def cnv_string(attribute, arg, element):
return unicode(arg) return type(u'')(arg)
def cnv_textnoteclass(attribute, arg, element): def cnv_textnoteclass(attribute, arg, element):
if str(arg) not in ("footnote", "endnote"): if str(arg) not in ("footnote", "endnote"):
@ -1480,5 +1480,4 @@ class AttrConverters:
conversion = attrconverters.get((attribute, None), None) conversion = attrconverters.get((attribute, None), None)
if conversion is not None: if conversion is not None:
return conversion(attribute, value, element) return conversion(attribute, value, element)
return unicode(value) return type(u'')(value)

View File

@ -182,7 +182,7 @@ class Node(xml.dom.Node):
def __unicode__(self): def __unicode__(self):
val = [] val = []
for c in self.childNodes: for c in self.childNodes:
val.append(unicode(c)) val.append(type(u'')(c))
return u''.join(val) return u''.join(val)
defproperty(Node, "firstChild", doc="First child node, or None.") defproperty(Node, "firstChild", doc="First child node, or None.")
@ -253,7 +253,7 @@ class Text(Childless, Node):
def toXml(self,level,f): def toXml(self,level,f):
""" Write XML in UTF-8 """ """ Write XML in UTF-8 """
if self.data: if self.data:
f.write(_escape(unicode(self.data).encode('utf-8'))) f.write(_escape(type(u'')(self.data).encode('utf-8')))
class CDATASection(Childless, Text): class CDATASection(Childless, Text):
nodeType = Node.CDATA_SECTION_NODE nodeType = Node.CDATA_SECTION_NODE
@ -469,7 +469,7 @@ class Element(Node):
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"') f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
for qname in self.attributes.keys(): for qname in self.attributes.keys():
prefix = self.get_nsprefix(qname[0]) prefix = self.get_nsprefix(qname[0])
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8'))) f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(type(u'')(self.attributes[qname]).encode('utf-8')))
f.write('>') f.write('>')
def write_close_tag(self, level, f): def write_close_tag(self, level, f):
@ -483,7 +483,7 @@ class Element(Node):
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"') f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
for qname in self.attributes.keys(): for qname in self.attributes.keys():
prefix = self.get_nsprefix(qname[0]) prefix = self.get_nsprefix(qname[0])
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8'))) f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(type(u'')(self.attributes[qname]).encode('utf-8')))
if self.childNodes: if self.childNodes:
f.write('>') f.write('>')
for element in self.childNodes: for element in self.childNodes:
@ -509,5 +509,3 @@ class Element(Node):
""" This is a check to see if the object is an instance of a type """ """ This is a check to see if the object is an instance of a type """
obj = element(check_grammar=False) obj = element(check_grammar=False)
return self.qname == obj.qname return self.qname == obj.qname

View File

@ -55,6 +55,7 @@ if False: # Added by Kovid
# character etc. styles Since CSS2 has no scope we use a prefix. (Not elegant) # character etc. styles Since CSS2 has no scope we use a prefix. (Not elegant)
# In ODF a style can have a parent, these parents can be chained. # In ODF a style can have a parent, these parents can be chained.
class StyleToCSS: class StyleToCSS:
""" The purpose of the StyleToCSS class is to contain the rules to convert """ The purpose of the StyleToCSS class is to contain the rules to convert
@ -317,6 +318,7 @@ class TagStack:
if attr in attrs: if attr in attrs:
return attrs[attr] return attrs[attr]
return None return None
def count_tags(self, tag): def count_tags(self, tag):
c = 0 c = 0
for ttag, tattrs in self.stack: for ttag, tattrs in self.stack:
@ -324,6 +326,7 @@ class TagStack:
c = c + 1 c = c + 1
return c return c
special_styles = { special_styles = {
'S-Emphasis':'em', 'S-Emphasis':'em',
'S-Citation':'cite', 'S-Citation':'cite',
@ -352,6 +355,8 @@ special_styles = {
# ODFCONTENTHANDLER # ODFCONTENTHANDLER
# #
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
class ODF2XHTML(handler.ContentHandler): class ODF2XHTML(handler.ContentHandler):
""" The ODF2XHTML parses an ODF file and produces XHTML""" """ The ODF2XHTML parses an ODF file and produces XHTML"""
@ -625,9 +630,6 @@ class ODF2XHTML(handler.ContentHandler):
self.anchors[name] = "anchor%d" % (len(self.anchors) + 1) self.anchors[name] = "anchor%d" % (len(self.anchors) + 1)
return self.anchors.get(name) return self.anchors.get(name)
# --------------------------------------------------
def purgedata(self): def purgedata(self):
self.data = [] self.data = []
@ -1457,7 +1459,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
# self.writeout( escape(mark) ) # self.writeout( escape(mark) )
# Since HTML only knows about endnotes, there is too much risk that the # Since HTML only knows about endnotes, there is too much risk that the
# marker is reused in the source. Therefore we force numeric markers # marker is reused in the source. Therefore we force numeric markers
self.writeout(unicode(self.currentnote)) self.writeout(type(u'')(self.currentnote))
self.closetag('a') self.closetag('a')
self.closetag('sup') self.closetag('sup')
@ -1566,7 +1568,6 @@ dl.notes dd:last-of-type { page-break-after: avoid }
self.writedata() self.writedata()
self.purgedata() self.purgedata()
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
# #
# Reading the file # Reading the file
@ -1593,7 +1594,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
self._walknode(c) self._walknode(c)
self.endElementNS(node.qname, node.tagName) self.endElementNS(node.qname, node.tagName)
if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE: if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
self.characters(unicode(node)) self.characters(type(u'')(node))
def odf2xhtml(self, odffile): def odf2xhtml(self, odffile):
""" Load a file and return the XHTML """ Load a file and return the XHTML

View File

@ -8,10 +8,6 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
import unittest import unittest
try:
unicode
except NameError:
unicode = str
def jsonify(tokens): def jsonify(tokens):
"""Turn tokens into "JSON-compatible" data structures.""" """Turn tokens into "JSON-compatible" data structures."""
@ -24,6 +20,7 @@ def jsonify(tokens):
else: else:
yield token.type, token.value yield token.type, token.value
class BaseTest(unittest.TestCase): class BaseTest(unittest.TestCase):
longMessage = True longMessage = True
@ -34,10 +31,8 @@ class BaseTest(unittest.TestCase):
"""Test not complete error messages but only substrings.""" """Test not complete error messages but only substrings."""
self.ae(len(errors), len(expected_errors)) self.ae(len(errors), len(expected_errors))
for error, expected in zip(errors, expected_errors): for error, expected in zip(errors, expected_errors):
self.assertIn(expected, unicode(error)) self.assertIn(expected, type(u'')(error))
def jsonify_declarations(self, rule): def jsonify_declarations(self, rule):
return [(decl.name, list(jsonify(decl.value))) return [(decl.name, list(jsonify(decl.value)))
for decl in rule.declarations] for decl in rule.declarations]