diff --git a/imgsrc/srv/generate.py b/imgsrc/srv/generate.py index c2a8ae9f14..c21c37d643 100644 --- a/imgsrc/srv/generate.py +++ b/imgsrc/srv/generate.py @@ -36,7 +36,7 @@ def merge(): for child in svg.iterchildren('*'): clone_node(child, symbol) ans.append(symbol) - ans = etree.tostring(ans, encoding=unicode, pretty_print=True, with_tail=False) + ans = etree.tostring(ans, encoding='unicode', pretty_print=True, with_tail=False) ans = re.sub(']+>', '', ans, count=1) return ans diff --git a/manual/conf.py b/manual/conf.py index 5ffab2fbb9..0649b64c63 100644 --- a/manual/conf.py +++ b/manual/conf.py @@ -168,7 +168,7 @@ def sort_languages(x): lc, name = x if lc == language: return '' - return sort_key(unicode(name)) + return sort_key(type(u'')(name)) html_context['other_languages'].sort(key=sort_languages) diff --git a/manual/custom.py b/manual/custom.py index 2f6a402bd4..2c9b31d514 100644 --- a/manual/custom.py +++ b/manual/custom.py @@ -198,7 +198,7 @@ def generate_ebook_convert_help(preamble, app): def update_cli_doc(name, raw, app): - if isinstance(raw, unicode): + if isinstance(raw, type(u'')): raw = raw.encode('utf-8') path = 'generated/%s/%s.rst' % (app.config.language, name) old_raw = open(path, 'rb').read() if os.path.exists(path) else '' diff --git a/manual/plugin_examples/interface_demo/config.py b/manual/plugin_examples/interface_demo/config.py index 4f87eba98b..a8cd4ab786 100644 --- a/manual/plugin_examples/interface_demo/config.py +++ b/manual/plugin_examples/interface_demo/config.py @@ -21,6 +21,7 @@ prefs = JSONConfig('plugins/interface_demo') # Set defaults prefs.defaults['hello_world_msg'] = 'Hello, World!' + class ConfigWidget(QWidget): def __init__(self): @@ -37,5 +38,4 @@ class ConfigWidget(QWidget): self.label.setBuddy(self.msg) def save_settings(self): - prefs['hello_world_msg'] = unicode(self.msg.text()) - + prefs['hello_world_msg'] = self.msg.text() diff --git a/setup/__init__.py b/setup/__init__.py index 0f9b840120..a6e86df9db 100644 --- a/setup/__init__.py +++ b/setup/__init__.py @@ -139,7 +139,7 @@ else: enc = preferred_encoding safe_encode = kwargs.get('safe_encode', False) for i, arg in enumerate(args): - if isinstance(arg, unicode): + if isinstance(arg, type(u'')): try: arg = arg.encode(enc) except UnicodeEncodeError: @@ -150,8 +150,8 @@ else: try: arg = str(arg) except ValueError: - arg = unicode(arg) - if isinstance(arg, unicode): + arg = type(u'')(arg) + if isinstance(arg, type(u'')): try: arg = arg.encode(enc) except UnicodeEncodeError: diff --git a/src/calibre/ebooks/BeautifulSoup.py b/src/calibre/ebooks/BeautifulSoup.py index dd2a6b80b3..d78376fb48 100644 --- a/src/calibre/ebooks/BeautifulSoup.py +++ b/src/calibre/ebooks/BeautifulSoup.py @@ -1795,41 +1795,41 @@ class UnicodeDammit: elif xml_data[:4] == '\x00\x3c\x00\x3f': # UTF-16BE sniffed_xml_encoding = 'utf-16be' - #xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') + #xml_data = type(u'')(xml_data, 'utf-16be').encode('utf-8') elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ and (xml_data[2:4] != '\x00\x00'): # UTF-16BE with BOM sniffed_xml_encoding = 'utf-16be' - #xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') + #xml_data = type(u'')(xml_data[2:], 'utf-16be').encode('utf-8') elif xml_data[:4] == '\x3c\x00\x3f\x00': # UTF-16LE sniffed_xml_encoding = 'utf-16le' - #xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') + #xml_data = type(u'')(xml_data, 'utf-16le').encode('utf-8') elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ (xml_data[2:4] != '\x00\x00'): # UTF-16LE with BOM sniffed_xml_encoding = 'utf-16le' - #xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') + #xml_data = type(u'')(xml_data[2:], 'utf-16le').encode('utf-8') elif xml_data[:4] == '\x00\x00\x00\x3c': # UTF-32BE sniffed_xml_encoding = 'utf-32be' - #xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') + #xml_data = type(u'')(xml_data, 'utf-32be').encode('utf-8') elif xml_data[:4] == '\x3c\x00\x00\x00': # UTF-32LE sniffed_xml_encoding = 'utf-32le' - #xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') + #xml_data = type(u'')(xml_data, 'utf-32le').encode('utf-8') elif xml_data[:4] == '\x00\x00\xfe\xff': # UTF-32BE with BOM sniffed_xml_encoding = 'utf-32be' - #xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') + #xml_data = type(u'')(xml_data[4:], 'utf-32be').encode('utf-8') elif xml_data[:4] == '\xff\xfe\x00\x00': # UTF-32LE with BOM sniffed_xml_encoding = 'utf-32le' - #xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') + #xml_data = type(u'')(xml_data[4:], 'utf-32le').encode('utf-8') elif xml_data[:3] == '\xef\xbb\xbf': # UTF-8 with BOM sniffed_xml_encoding = 'utf-8' - #xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') + #xml_data = type(u'')(xml_data[3:], 'utf-8').encode('utf-8') else: sniffed_xml_encoding = 'ascii' pass diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index 70f70b1052..da64255ee5 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -93,7 +93,7 @@ def parse_details_page(url, log, timeout, browser, domain): errmsg = root.xpath('//*[@id="errorMessage"]') if errmsg: msg = 'Failed to parse amazon details page: %r' % url - msg += tostring(errmsg, method='text', encoding=unicode).strip() + msg += tostring(errmsg, method='text', encoding='unicode').strip() log.error(msg) return @@ -466,7 +466,7 @@ class Worker(Thread): # Get details {{{ self.result_queue.put(mi) def totext(self, elem): - return self.tostring(elem, encoding=unicode, method='text').strip() + return self.tostring(elem, encoding='unicode', method='text').strip() def parse_title(self, root): h1 = root.xpath('//h1[@id="title"]') @@ -478,10 +478,10 @@ class Worker(Thread): # Get details {{{ tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0] actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]') if actual_title: - title = self.tostring(actual_title[0], encoding=unicode, + title = self.tostring(actual_title[0], encoding='unicode', method='text').strip() else: - title = self.tostring(tdiv, encoding=unicode, + title = self.tostring(tdiv, encoding='unicode', method='text').strip() ans = re.sub(r'[(\[].*[)\]]', '', title).strip() if not ans: @@ -508,7 +508,7 @@ class Worker(Thread): # Get details {{{ ''') for x in aname: x.tail = '' - authors = [self.tostring(x, encoding=unicode, method='text').strip() for x + authors = [self.tostring(x, encoding='unicode', method='text').strip() for x in aname] authors = [a for a in authors if a] return authors @@ -559,7 +559,7 @@ class Worker(Thread): # Get details {{{ for a in desc.xpath('descendant::a[@href]'): del a.attrib['href'] a.tag = 'span' - desc = self.tostring(desc, method='html', encoding=unicode).strip() + desc = self.tostring(desc, method='html', encoding='unicode').strip() # Encoding bug in Amazon data U+fffd (replacement char) # in some examples it is present in place of ' @@ -626,14 +626,14 @@ class Worker(Thread): # Get details {{{ spans = series.xpath('./span') if spans: raw = self.tostring( - spans[0], encoding=unicode, method='text', with_tail=False).strip() + spans[0], encoding='unicode', method='text', with_tail=False).strip() m = re.search(r'\s+([0-9.]+)$', raw.strip()) if m is not None: series_index = float(m.group(1)) s = series.xpath('./a[@id="series-page-link"]') if s: series = self.tostring( - s[0], encoding=unicode, method='text', with_tail=False).strip() + s[0], encoding='unicode', method='text', with_tail=False).strip() if series: ans = (series, series_index) # This is found on Kindle edition pages on amazon.com @@ -646,7 +646,7 @@ class Worker(Thread): # Get details {{{ a = span.xpath('./a[@href]') if a: series = self.tostring( - a[0], encoding=unicode, method='text', with_tail=False).strip() + a[0], encoding='unicode', method='text', with_tail=False).strip() if series: ans = (series, series_index) # This is found on newer Kindle edition pages on amazon.com @@ -659,14 +659,14 @@ class Worker(Thread): # Get details {{{ a = b.getparent().xpath('./a[@href]') if a: series = self.tostring( - a[0], encoding=unicode, method='text', with_tail=False).partition('(')[0].strip() + a[0], encoding='unicode', method='text', with_tail=False).partition('(')[0].strip() if series: ans = series, series_index if ans == (None, None): desc = root.xpath('//div[@id="ps-content"]/div[@class="buying"]') if desc: - raw = self.tostring(desc[0], method='text', encoding=unicode) + raw = self.tostring(desc[0], method='text', encoding='unicode') raw = re.sub(r'\s+', ' ', raw) match = self.series_pat.search(raw) if match is not None: @@ -1161,7 +1161,7 @@ class Amazon(Source): if not result_links: result_links = root.xpath(r'//li[starts-with(@id, "result_")]//a[@href and contains(@class, "s-access-detail-page")]') for a in result_links: - title = tostring(a, method='text', encoding=unicode) + title = tostring(a, method='text', encoding='unicode') if title_ok(title): url = a.get('href') if url.startswith('/'): @@ -1177,7 +1177,7 @@ class Amazon(Source): # New amazon markup links = div.xpath('descendant::h3/a[@href]') for a in links: - title = tostring(a, method='text', encoding=unicode) + title = tostring(a, method='text', encoding='unicode') if title_ok(title): url = a.get('href') if url.startswith('/'): @@ -1192,7 +1192,7 @@ class Amazon(Source): for td in root.xpath( r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'): for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'): - title = tostring(a, method='text', encoding=unicode) + title = tostring(a, method='text', encoding='unicode') if title_ok(title): url = a.get('href') if url.startswith('/'): diff --git a/src/calibre/ebooks/metadata/sources/cli.py b/src/calibre/ebooks/metadata/sources/cli.py index 6424ab5041..d7d5153cf9 100644 --- a/src/calibre/ebooks/metadata/sources/cli.py +++ b/src/calibre/ebooks/metadata/sources/cli.py @@ -99,7 +99,7 @@ def main(args=sys.argv): log = buf.getvalue() result = (metadata_to_opf(result) if opts.opf else - unicode(result).encode('utf-8')) + type(u'')(result).encode('utf-8')) if opts.verbose: print (log, file=sys.stderr) diff --git a/src/calibre/ebooks/metadata/sources/douban.py b/src/calibre/ebooks/metadata/sources/douban.py index 77b8b392f3..5248565b47 100644 --- a/src/calibre/ebooks/metadata/sources/douban.py +++ b/src/calibre/ebooks/metadata/sources/douban.py @@ -203,7 +203,7 @@ class Douban(Source): build_term('author', author_tokens)) t = 'search' q = q.strip() - if isinstance(q, unicode): + if isinstance(q, type(u'')): q = q.encode('utf-8') if not q: return None diff --git a/src/calibre/ebooks/metadata/sources/edelweiss.py b/src/calibre/ebooks/metadata/sources/edelweiss.py index fd6425eeac..79867ddcb7 100644 --- a/src/calibre/ebooks/metadata/sources/edelweiss.py +++ b/src/calibre/ebooks/metadata/sources/edelweiss.py @@ -31,7 +31,7 @@ def parse_html(raw): def astext(node): from lxml import etree - return etree.tostring(node, method='text', encoding=unicode, + return etree.tostring(node, method='text', encoding='unicode', with_tail=False).strip() @@ -110,7 +110,7 @@ class Worker(Thread): # {{{ for a in desc.xpath('descendant::a[@href]'): del a.attrib['href'] a.tag = 'span' - desc = etree.tostring(desc, method='html', encoding=unicode).strip() + desc = etree.tostring(desc, method='html', encoding='unicode').strip() # remove all attributes from tags desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc) @@ -160,7 +160,7 @@ def get_basic_data(browser, log, *skus): tags = [] rating = 0 for bar in row.xpath('descendant::*[contains(@class, "bgdColorCommunity")]/@style'): - m = re.search('width: (\d+)px;.*max-width: (\d+)px', bar) + m = re.search(r'width: (\d+)px;.*max-width: (\d+)px', bar) if m is not None: rating = float(m.group(1)) / float(m.group(2)) break @@ -283,7 +283,7 @@ class Edelweiss(Source): except Exception as e: log.exception('Failed to make identify query: %r'%query) return as_unicode(e) - items = re.search('window[.]items\s*=\s*(.+?);', raw) + items = re.search(r'window[.]items\s*=\s*(.+?);', raw) if items is None: log.error('Failed to get list of matching items') log.debug('Response text:') diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py index acc85e177e..609fe51d9c 100644 --- a/src/calibre/ebooks/metadata/sources/google.py +++ b/src/calibre/ebooks/metadata/sources/google.py @@ -214,7 +214,7 @@ class GoogleBooks(Source): if author_tokens: q += ('+' if q else '') + build_term('author', author_tokens) - if isinstance(q, unicode): + if isinstance(q, type(u'')): q = q.encode('utf-8') if not q: return None diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py index 2861f8435a..5fc4a5bfb0 100644 --- a/src/calibre/ebooks/metadata/sources/identify.py +++ b/src/calibre/ebooks/metadata/sources/identify.py @@ -471,7 +471,7 @@ def identify(log, abort, # {{{ for r in presults: log('\n\n---') try: - log(unicode(r)) + log(type(u'')(r)) except TypeError: log(repr(r)) if plog: diff --git a/src/calibre/ebooks/metadata/sources/overdrive.py b/src/calibre/ebooks/metadata/sources/overdrive.py index 51ed5c8136..471f7a669a 100755 --- a/src/calibre/ebooks/metadata/sources/overdrive.py +++ b/src/calibre/ebooks/metadata/sources/overdrive.py @@ -233,7 +233,7 @@ class OverDrive(Source): xreq.add_header('Referer', q_init_search) xreq.add_header('Accept', 'application/json, text/javascript, */*') raw = br.open_novisit(xreq).read() - for m in re.finditer(unicode(r'"iTotalDisplayRecords":(?P\d+).*?"iTotalRecords":(?P\d+)'), raw): + for m in re.finditer(type(u'')(r'"iTotalDisplayRecords":(?P\d+).*?"iTotalRecords":(?P\d+)'), raw): if int(m.group('totalrecords')) == 0: return '' elif int(m.group('displayrecords')) >= 1: @@ -450,7 +450,7 @@ class OverDrive(Source): if desc: desc = desc[0] - desc = html.tostring(desc, method='html', encoding=unicode).strip() + desc = html.tostring(desc, method='html', encoding='unicode').strip() # remove all attributes from tags desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc) # Remove comments diff --git a/src/calibre/ebooks/metadata/sources/ozon.py b/src/calibre/ebooks/metadata/sources/ozon.py index b125c45f26..0c167d90ac 100644 --- a/src/calibre/ebooks/metadata/sources/ozon.py +++ b/src/calibre/ebooks/metadata/sources/ozon.py @@ -100,7 +100,7 @@ class Ozon(Source): qItems.discard('') searchText = u' '.join(qItems).strip() - if isinstance(searchText, unicode): + if isinstance(searchText, type(u'')): searchText = searchText.encode('utf-8') if not searchText: return None @@ -148,7 +148,7 @@ class Ozon(Source): else: # Redirect page: trying to extract ozon_id from javascript data h = HTMLParser() - entry_string = (h.unescape(etree.tostring(doc, pretty_print=True, encoding=unicode))) + entry_string = (h.unescape(etree.tostring(doc, pretty_print=True, encoding='unicode'))) json_pat = re.compile(r'dataLayer\s*=\s*(.+)?;') json_info = re.search(json_pat, entry_string) jsondata = json_info.group(1) if json_info else None @@ -198,16 +198,16 @@ class Ozon(Source): reRemoveFromTitle = re.compile(r'[?!:.,;+-/&%"\'=]') - title = unicode(title).upper() if title else '' + title = type(u'')(title).upper() if title else '' if reRemoveFromTitle: title = reRemoveFromTitle.sub('', title) authors = map(_normalizeAuthorNameWithInitials, - map(unicode.upper, map(unicode, authors))) if authors else None + map(type(u'').upper, map(type(u''), authors))) if authors else None ozon_id = identifiers.get('ozon', None) # log.debug(u'ozonid: ', ozon_id) - unk = unicode(_('Unknown')).upper() + unk = type(u'')(_('Unknown')).upper() if title == unk: title = None @@ -226,7 +226,7 @@ class Ozon(Source): def calc_source_relevance(mi): # {{{ relevance = 0 if title: - mititle = unicode(mi.title).upper() if mi.title else '' + mititle = type(u'')(mi.title).upper() if mi.title else '' if reRemoveFromTitle: mititle = reRemoveFromTitle.sub('', mititle) @@ -240,7 +240,7 @@ class Ozon(Source): relevance += 1 if authors: - miauthors = map(unicode.upper, map(unicode, mi.authors)) if mi.authors else [] + miauthors = map(type(u'').upper, map(type(u''), mi.authors)) if mi.authors else [] # log.debug('Authors %s vs miauthors %s'%(','.join(authors), ','.join(miauthors))) if (in_authors(authors, miauthors)): @@ -320,13 +320,13 @@ class Ozon(Source): # }}} def to_metadata(self, log, entry): # {{{ - title = unicode(entry.xpath(u'normalize-space(.//div[@itemprop="name"][1]/text())')) + title = type(u'')(entry.xpath(u'normalize-space(.//div[@itemprop="name"][1]/text())')) # log.debug(u'Title: -----> %s' % title) - author = unicode(entry.xpath(u'normalize-space(.//div[contains(@class, "mPerson")])')) + author = type(u'')(entry.xpath(u'normalize-space(.//div[contains(@class, "mPerson")])')) # log.debug(u'Author: -----> %s' % author) - norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u','))) + norm_authors = map(_normalizeAuthorNameWithInitials, map(type(u'').strip, type(u'')(author).split(u','))) mi = Metadata(title, norm_authors) ozon_id = entry.get('data-href').split('/')[-2] @@ -524,7 +524,7 @@ class Ozon(Source): # comments, from Javascript data beginning = fullString.find(u'FirstBlock') end = fullString.find(u'}', beginning) - comments = unicode(fullString[beginning + 75:end - 1]).decode("unicode-escape") + comments = type(u'')(fullString[beginning + 75:end - 1]).decode("unicode-escape") metadata.comments = replace_entities(comments, 'utf-8') # }}} @@ -603,7 +603,7 @@ def _format_isbn(log, isbn): # {{{ def _translageLanguageToCode(displayLang): # {{{ - displayLang = unicode(displayLang).strip() if displayLang else None + displayLang = type(u'')(displayLang).strip() if displayLang else None langTbl = {None: 'ru', u'Русский': 'ru', u'Немецкий': 'de', @@ -627,9 +627,9 @@ def _normalizeAuthorNameWithInitials(name): # {{{ if name: re1 = r'^(?P\S+)\s+(?P[^\d\W]\.)(?:\s*(?P[^\d\W]\.))?$' re2 = r'^(?P[^\d\W]\.)(?:\s*(?P[^\d\W]\.))?\s+(?P\S+)$' - matcher = re.match(re1, unicode(name), re.UNICODE) + matcher = re.match(re1, type(u'')(name), re.UNICODE) if not matcher: - matcher = re.match(re2, unicode(name), re.UNICODE) + matcher = re.match(re2, type(u'')(name), re.UNICODE) if matcher: d = matcher.groupdict() @@ -653,7 +653,7 @@ def toPubdate(log, yearAsString): # {{{ # }}} def _listToUnicodePrintStr(lst): # {{{ - return u'[' + u', '.join(unicode(x) for x in lst) + u']' + return u'[' + u', '.join(type(u'')(x) for x in lst) + u']' # }}} diff --git a/src/calibre/ebooks/metadata/sources/search_engines.py b/src/calibre/ebooks/metadata/sources/search_engines.py index c8214f3c7b..1bf6fea908 100644 --- a/src/calibre/ebooks/metadata/sources/search_engines.py +++ b/src/calibre/ebooks/metadata/sources/search_engines.py @@ -26,7 +26,7 @@ Result = namedtuple('Result', 'url title cached_url') def tostring(elem): - return etree.tostring(elem, encoding=unicode, method='text', with_tail=False) + return etree.tostring(elem, encoding='unicode', method='text', with_tail=False) def browser(): diff --git a/src/calibre/ebooks/textile/functions.py b/src/calibre/ebooks/textile/functions.py index d7c559f952..8c6d5d406e 100755 --- a/src/calibre/ebooks/textile/functions.py +++ b/src/calibre/ebooks/textile/functions.py @@ -128,11 +128,11 @@ class Textile(object): pnct = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]' # urlch = r'[\w"$\-_.+!*\'(),";/?:@=&%#{}|\\^~\[\]`]' - urlch = '[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]' + urlch = r'[\w"$\-_.+*\'(),";\/?:@=&%#{}|\\^~\[\]`]' url_schemes = ('http', 'https', 'ftp', 'mailto') - btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p') + btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', r'fn\d+', 'p') btag_lite = ('bq', 'bc', 'p') macro_defaults = [ @@ -292,7 +292,7 @@ class Textile(object): """ self.html_type = html_type - # text = unicode(text) + # text = type(u'')(text) text = _normalize_newlines(text) if self.restricted: diff --git a/src/calibre/gui2/store/stores/biblio_plugin.py b/src/calibre/gui2/store/stores/biblio_plugin.py index 03873283e2..d085097c86 100644 --- a/src/calibre/gui2/store/stores/biblio_plugin.py +++ b/src/calibre/gui2/store/stores/biblio_plugin.py @@ -21,7 +21,9 @@ class BiblioStore(BasicStoreConfig, OpenSearchOPDSStore): def search(self, query, max_results=10, timeout=60): # check for cyrillic symbols before performing search - uquery = unicode(query.strip(), 'utf-8') + if isinstance(query, bytes): + query = query.decode('utf-8') + uquery = query.strip() reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery) if not reObj: return diff --git a/src/calibre/gui2/store/stores/chitanka_plugin.py b/src/calibre/gui2/store/stores/chitanka_plugin.py index 32a4f5099a..052c6fb751 100644 --- a/src/calibre/gui2/store/stores/chitanka_plugin.py +++ b/src/calibre/gui2/store/stores/chitanka_plugin.py @@ -43,7 +43,9 @@ class ChitankaStore(BasicStoreConfig, StorePlugin): def search(self, query, max_results=10, timeout=60): # check for cyrillic symbols before performing search - uquery = unicode(query.strip(), 'utf-8') + if isinstance(query, bytes): + query = query.decode('utf-8') + uquery = query.strip() reObj = re.search(u'^[а-яА-Я\\d\\s]{3,}$', uquery) if not reObj: return @@ -56,7 +58,7 @@ class ChitankaStore(BasicStoreConfig, StorePlugin): br = browser() try: with closing(br.open(url, timeout=timeout)) as f: - f = unicode(f.read(), 'utf-8') + f = f.read().decode('utf-8') doc = html.fromstring(f) for data in doc.xpath('//ul[@class="superlist booklist"]/li'): @@ -98,7 +100,7 @@ class ChitankaStore(BasicStoreConfig, StorePlugin): with closing(br2.open(base_url + author_url, timeout=timeout)) as f: if counter <= 0: break - f = unicode(f.read(), 'utf-8') + f = f.read().decode('utf-8') doc2 = html.fromstring(f) # search for book title diff --git a/src/calibre/gui2/store/stores/ebooksgratuits_plugin.py b/src/calibre/gui2/store/stores/ebooksgratuits_plugin.py index 48b27badf2..b966d2c2ce 100644 --- a/src/calibre/gui2/store/stores/ebooksgratuits_plugin.py +++ b/src/calibre/gui2/store/stores/ebooksgratuits_plugin.py @@ -22,10 +22,9 @@ class EbooksGratuitsStore(BasicStoreConfig, OpenSearchOPDSStore): return ascii_text(s) def search(self, query, max_results=10, timeout=60): - query = self.strip_accents(unicode(query)) + query = self.strip_accents(type(u'')(query)) for s in OpenSearchOPDSStore.search(self, query, max_results, timeout): if s.downloads: s.drm = SearchResult.DRM_UNLOCKED s.price = '$0.00' yield s - diff --git a/src/calibre/gui2/store/stores/eknigi_plugin.py b/src/calibre/gui2/store/stores/eknigi_plugin.py index 1eaf83e7b0..f97dd41c5d 100644 --- a/src/calibre/gui2/store/stores/eknigi_plugin.py +++ b/src/calibre/gui2/store/stores/eknigi_plugin.py @@ -49,7 +49,7 @@ class eKnigiStore(BasicStoreConfig, StorePlugin): def search(self, query, max_results=10, timeout=60): # check for cyrillic symbols before performing search - uquery = unicode(query.strip(), 'utf-8') + uquery = type(u'')(query.strip(), 'utf-8') reObj = re.search(u'^[а-яА-Я\\d\\s]{2,}$', uquery) if not reObj: return diff --git a/src/calibre/gui2/store/stores/kobo_plugin.py b/src/calibre/gui2/store/stores/kobo_plugin.py index 74114ffc72..578803c081 100644 --- a/src/calibre/gui2/store/stores/kobo_plugin.py +++ b/src/calibre/gui2/store/stores/kobo_plugin.py @@ -46,7 +46,7 @@ def search_kobo(query, max_results=10, timeout=60, write_html_to=None): cover_url = None for p in select('p.title', item): - title = etree.tostring(p, method='text', encoding=unicode).strip() + title = etree.tostring(p, method='text', encoding='unicode').strip() for a in select('a[href]', p): url = a.get('href') break @@ -58,11 +58,11 @@ def search_kobo(query, max_results=10, timeout=60, write_html_to=None): authors = [] for a in select('p.contributor-list a.contributor-name', item): - authors.append(etree.tostring(a, method='text', encoding=unicode).strip()) + authors.append(etree.tostring(a, method='text', encoding='unicode').strip()) authors = authors_to_string(authors) for p in select('p.price', item): - price = etree.tostring(p, method='text', encoding=unicode).strip() + price = etree.tostring(p, method='text', encoding='unicode').strip() break else: price = None diff --git a/src/calibre/gui2/store/stores/litres_plugin.py b/src/calibre/gui2/store/stores/litres_plugin.py index 5e67eff854..13b342cb0a 100644 --- a/src/calibre/gui2/store/stores/litres_plugin.py +++ b/src/calibre/gui2/store/stores/litres_plugin.py @@ -88,7 +88,7 @@ class LitResStore(BasicStoreConfig, StorePlugin): authors = data.xpath('.//title-info/author/first-name/text()|' './/title-info/author/middle-name/text()|' './/title-info/author/last-name/text()') - sRes.author = u' '.join(map(unicode, authors)) + sRes.author = u' '.join(map(type(u''), authors)) sRes.price = data.xpath(xp_template.format('price')) # cover vs cover_preview sRes.cover_url = data.xpath(xp_template.format('cover_preview')) @@ -107,7 +107,7 @@ def format_price_in_RUR(price): @return: formatted price if possible otherwise original value @rtype: unicode ''' - if price and re.match("^\d*?\.\d*?$", price): + if price and re.match(r"^\d*?\.\d*?$", price): try: price = u'{:,.2F} руб.'.format(float(price)) price = price.replace(',', ' ').replace('.', ',', 1) diff --git a/src/calibre/gui2/store/stores/mobileread/adv_search_builder.py b/src/calibre/gui2/store/stores/mobileread/adv_search_builder.py index df90041ddc..42b4b5f3b7 100644 --- a/src/calibre/gui2/store/stores/mobileread/adv_search_builder.py +++ b/src/calibre/gui2/store/stores/mobileread/adv_search_builder.py @@ -67,7 +67,7 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog): self.mc = '=' else: self.mc = '~' - all, any, phrase, none = map(lambda x: unicode(x.text()), + all, any, phrase, none = map(lambda x: type(u'')(x.text()), (self.all, self.any, self.phrase, self.none)) all, any, none = map(self.tokens, (all, any, none)) phrase = phrase.strip() @@ -86,11 +86,11 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog): return ans def token(self): - txt = unicode(self.text.text()).strip() + txt = type(u'')(self.text.text()).strip() if txt: if self.negate.isChecked(): txt = '!'+txt - tok = self.FIELDS[unicode(self.field.currentText())]+txt + tok = self.FIELDS[type(u'')(self.field.currentText())]+txt if re.search(r'\s', tok): tok = '"%s"'%tok return tok @@ -106,13 +106,13 @@ class AdvSearchBuilderDialog(QDialog, Ui_Dialog): ans = [] self.box_last_values = {} - title = unicode(self.title_box.text()).strip() + title = type(u'')(self.title_box.text()).strip() if title: ans.append('title:"' + self.mc + title + '"') - author = unicode(self.author_box.text()).strip() + author = type(u'')(self.author_box.text()).strip() if author: ans.append('author:"' + self.mc + author + '"') - format = unicode(self.format_box.text()).strip() + format = type(u'')(self.format_box.text()).strip() if format: ans.append('format:"' + self.mc + format + '"') if ans: diff --git a/src/calibre/gui2/store/stores/mobileread/cache_update_thread.py b/src/calibre/gui2/store/stores/mobileread/cache_update_thread.py index d7a8036ecd..5d604725b6 100644 --- a/src/calibre/gui2/store/stores/mobileread/cache_update_thread.py +++ b/src/calibre/gui2/store/stores/mobileread/cache_update_thread.py @@ -22,7 +22,7 @@ class CacheUpdateThread(Thread, QObject): total_changed = pyqtSignal(int) update_progress = pyqtSignal(int) - update_details = pyqtSignal(unicode) + update_details = pyqtSignal(type(u'')) def __init__(self, config, seralize_books_function, timeout): Thread.__init__(self) diff --git a/src/calibre/gui2/store/stores/mobileread/models.py b/src/calibre/gui2/store/stores/mobileread/models.py index fb3c250a16..54a5b9e0de 100644 --- a/src/calibre/gui2/store/stores/mobileread/models.py +++ b/src/calibre/gui2/store/stores/mobileread/models.py @@ -105,7 +105,7 @@ class BooksModel(QAbstractItemModel): return descending = order == Qt.DescendingOrder self.books.sort(None, - lambda x: sort_key(unicode(self.data_as_text(x, col))), + lambda x: sort_key(type(u'')(self.data_as_text(x, col))), descending) if reset: self.beginResetModel(), self.endResetModel() diff --git a/src/calibre/gui2/store/stores/mobileread/store_dialog.py b/src/calibre/gui2/store/stores/mobileread/store_dialog.py index ce8e806a58..a68dd32ebe 100644 --- a/src/calibre/gui2/store/stores/mobileread/store_dialog.py +++ b/src/calibre/gui2/store/stores/mobileread/store_dialog.py @@ -40,7 +40,7 @@ class MobileReadStoreDialog(QDialog, Ui_Dialog): self.restore_state() def do_search(self): - self.results_view.model().search(unicode(self.search_query.text())) + self.results_view.model().search(type(u'')(self.search_query.text())) def open_store(self, index): result = self.results_view.model().get_book(index) diff --git a/src/calibre/utils/unicode_getpass.py b/src/calibre/utils/unicode_getpass.py index 887938ad6e..fa027fa1ea 100644 --- a/src/calibre/utils/unicode_getpass.py +++ b/src/calibre/utils/unicode_getpass.py @@ -6,31 +6,35 @@ from __future__ import absolute_import, division, print_function, unicode_litera import sys -from calibre.constants import iswindows, preferred_encoding +from calibre.constants import iswindows, preferred_encoding, ispy3 -def getpass(prompt): - if iswindows: - # getpass is broken on windows with python 2.x and unicode, the - # below implementation is from the python 3 source code - import msvcrt - for c in prompt: - msvcrt.putwch(c) - pw = "" - while 1: - c = msvcrt.getwch() - if c == '\r' or c == '\n': - break - if c == '\003': - raise KeyboardInterrupt - if c == '\b': - pw = pw[:-1] - else: - pw = pw + c - msvcrt.putwch('\r') - msvcrt.putwch('\n') - return pw - else: - enc = getattr(sys.stdin, 'encoding', preferred_encoding) or preferred_encoding - from getpass import getpass - return getpass(prompt).decode(enc) +if ispy3: + from getpass import getpass + getpass +else: + def getpass(prompt): + if iswindows: + # getpass is broken on windows with python 2.x and unicode, the + # below implementation is from the python 3 source code + import msvcrt + for c in prompt: + msvcrt.putwch(c) + pw = "" + while 1: + c = msvcrt.getwch() + if c == '\r' or c == '\n': + break + if c == '\003': + raise KeyboardInterrupt + if c == '\b': + pw = pw[:-1] + else: + pw = pw + c + msvcrt.putwch('\r') + msvcrt.putwch('\n') + return pw + else: + enc = getattr(sys.stdin, 'encoding', preferred_encoding) or preferred_encoding + from getpass import getpass + return getpass(prompt).decode(enc) diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index a55f4795bf..c72454b827 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -9,7 +9,7 @@ from calibre.web.feeds.news import (BasicNewsRecipe, CustomIndexRecipe, AutomaticNewsRecipe, CalibrePeriodical) from calibre.ebooks.BeautifulSoup import BeautifulSoup from calibre.utils.config import JSONConfig -from polyglot.builtins import unicode_type +from polyglot.builtins import unicode_type, codepoint_to_chr basic_recipes = (BasicNewsRecipe, AutomaticNewsRecipe, CustomIndexRecipe, CalibrePeriodical) @@ -44,7 +44,9 @@ def compile_recipe(src): 'BasicNewsRecipe':BasicNewsRecipe, 'AutomaticNewsRecipe':AutomaticNewsRecipe, 'time':time, 're':re, - 'BeautifulSoup':BeautifulSoup + 'BeautifulSoup':BeautifulSoup, + 'unicode': unicode_type, + 'unichr': codepoint_to_chr, } exec(src, namespace) diff --git a/src/css_selectors/parser.py b/src/css_selectors/parser.py index 314cc73a88..189c29c0df 100644 --- a/src/css_selectors/parser.py +++ b/src/css_selectors/parser.py @@ -15,20 +15,17 @@ import operator import string from css_selectors.errors import SelectorSyntaxError, ExpressionError +from polyglot.builtins import unicode_type, codepoint_to_chr -if sys.version_info[0] < 3: - _unicode = unicode - _unichr = unichr -else: - _unicode = str - _unichr = chr tab = string.maketrans(string.ascii_uppercase, string.ascii_lowercase) utab = {c:c+32 for c in range(ord('A'), ord('Z')+1)} + def ascii_lower(string): """Lower-case, but only in the ASCII range.""" - return string.translate(utab if isinstance(string, _unicode) else tab) + return string.translate(utab if isinstance(string, unicode_type) else tab) + def urepr(x): if isinstance(x, list): @@ -38,6 +35,7 @@ def urepr(x): ans = ans[1:] return ans + # Parsed objects class Selector(object): @@ -385,6 +383,7 @@ def parse_selector_group(stream): else: break + def parse_selector(stream): result, pseudo_element = parse_simple_selector(stream) while 1: @@ -461,7 +460,7 @@ def parse_simple_selector(stream, inside_negation=False): 'before', 'after'): # Special case: CSS 2.1 pseudo-elements can have a single ':' # Any new pseudo-element must have two. - pseudo_element = _unicode(ident) + pseudo_element = unicode_type(ident) continue if stream.peek() != ('DELIM', '('): result = Pseudo(result, ident) @@ -626,11 +625,13 @@ class TokenMacros: nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii) nmstart = '[_a-z]|%s|%s' % (escape, nonascii) + def _compile(pattern): return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match + _match_whitespace = _compile(r'[ \t\r\n\f]+') -_match_number = _compile('[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)') +_match_number = _compile(r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)') _match_hash = _compile('#(?:%(nmchar)s)+') _match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*') _match_string_by_quote = { @@ -650,11 +651,12 @@ else: def _replace_simple(match): return match.group(1) + def _replace_unicode(match): codepoint = int(match.group(1), 16) if codepoint > sys.maxunicode: codepoint = 0xFFFD - return _unichr(codepoint) + return codepoint_to_chr(codepoint) def unescape_ident(value): diff --git a/src/odf/attrconverters.py b/src/odf/attrconverters.py index e09b0f5caf..1ebc173961 100644 --- a/src/odf/attrconverters.py +++ b/src/odf/attrconverters.py @@ -29,7 +29,7 @@ def make_NCName(arg): return arg def cnv_anyURI(attribute, arg, element): - return unicode(arg) + return type(u'')(arg) def cnv_boolean(attribute, arg, element): if arg.lower() in ("false","no"): @@ -85,13 +85,13 @@ def cnv_family(attribute, arg, element): def __save_prefix(attribute, arg, element): prefix = arg.split(':',1)[0] if prefix == arg: - return unicode(arg) + return type(u'')(arg) namespace = element.get_knownns(prefix) if namespace is None: #raise ValueError, "'%s' is an unknown prefix" % str(prefix) - return unicode(arg) + return type(u'')(arg) p = element.get_nsprefix(namespace) - return unicode(arg) + return type(u'')(arg) def cnv_formula(attribute, arg, element): """ A string containing a formula. Formulas do not have a predefined syntax, but the string should @@ -218,7 +218,7 @@ def cnv_positiveInteger(attribute, arg, element): return str(arg) def cnv_string(attribute, arg, element): - return unicode(arg) + return type(u'')(arg) def cnv_textnoteclass(attribute, arg, element): if str(arg) not in ("footnote", "endnote"): @@ -1480,5 +1480,4 @@ class AttrConverters: conversion = attrconverters.get((attribute, None), None) if conversion is not None: return conversion(attribute, value, element) - return unicode(value) - + return type(u'')(value) diff --git a/src/odf/element.py b/src/odf/element.py index 7b9310a2aa..b0a4b3d406 100644 --- a/src/odf/element.py +++ b/src/odf/element.py @@ -182,7 +182,7 @@ class Node(xml.dom.Node): def __unicode__(self): val = [] for c in self.childNodes: - val.append(unicode(c)) + val.append(type(u'')(c)) return u''.join(val) defproperty(Node, "firstChild", doc="First child node, or None.") @@ -253,8 +253,8 @@ class Text(Childless, Node): def toXml(self,level,f): """ Write XML in UTF-8 """ if self.data: - f.write(_escape(unicode(self.data).encode('utf-8'))) - + f.write(_escape(type(u'')(self.data).encode('utf-8'))) + class CDATASection(Childless, Text): nodeType = Node.CDATA_SECTION_NODE @@ -283,7 +283,7 @@ class Element(Node): Node.TEXT_NODE, Node.CDATA_SECTION_NODE, Node.ENTITY_REFERENCE_NODE) - + def __init__(self, attributes=None, text=None, cdata=None, qname=None, qattributes=None, check_grammar=True, **args): if qname is not None: self.qname = qname @@ -334,7 +334,7 @@ class Element(Node): for ns,p in nsdict.items(): if p == prefix: return ns return None - + def get_nsprefix(self, namespace): """ Odfpy maintains a list of known namespaces. In some cases we have a namespace URL, and needs to look up or assign the prefix for it. @@ -352,7 +352,7 @@ class Element(Node): element.ownerDocument = self.ownerDocument for child in element.childNodes: self._setOwnerDoc(child) - + def addElement(self, element, check_grammar=True): """ adds an element to an Element @@ -469,7 +469,7 @@ class Element(Node): f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"') for qname in self.attributes.keys(): prefix = self.get_nsprefix(qname[0]) - f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8'))) + f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(type(u'')(self.attributes[qname]).encode('utf-8'))) f.write('>') def write_close_tag(self, level, f): @@ -483,7 +483,7 @@ class Element(Node): f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"') for qname in self.attributes.keys(): prefix = self.get_nsprefix(qname[0]) - f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8'))) + f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(type(u'')(self.attributes[qname]).encode('utf-8'))) if self.childNodes: f.write('>') for element in self.childNodes: @@ -509,5 +509,3 @@ class Element(Node): """ This is a check to see if the object is an instance of a type """ obj = element(check_grammar=False) return self.qname == obj.qname - - diff --git a/src/odf/odf2xhtml.py b/src/odf/odf2xhtml.py index 99b3279207..9f1e763551 100644 --- a/src/odf/odf2xhtml.py +++ b/src/odf/odf2xhtml.py @@ -55,6 +55,7 @@ if False: # Added by Kovid # character etc. styles Since CSS2 has no scope we use a prefix. (Not elegant) # In ODF a style can have a parent, these parents can be chained. + class StyleToCSS: """ The purpose of the StyleToCSS class is to contain the rules to convert @@ -317,6 +318,7 @@ class TagStack: if attr in attrs: return attrs[attr] return None + def count_tags(self, tag): c = 0 for ttag, tattrs in self.stack: @@ -324,6 +326,7 @@ class TagStack: c = c + 1 return c + special_styles = { 'S-Emphasis':'em', 'S-Citation':'cite', @@ -352,6 +355,8 @@ special_styles = { # ODFCONTENTHANDLER # # ----------------------------------------------------------------------------- + + class ODF2XHTML(handler.ContentHandler): """ The ODF2XHTML parses an ODF file and produces XHTML""" @@ -625,9 +630,6 @@ class ODF2XHTML(handler.ContentHandler): self.anchors[name] = "anchor%d" % (len(self.anchors) + 1) return self.anchors.get(name) - -# -------------------------------------------------- - def purgedata(self): self.data = [] @@ -1457,7 +1459,7 @@ dl.notes dd:last-of-type { page-break-after: avoid } # self.writeout( escape(mark) ) # Since HTML only knows about endnotes, there is too much risk that the # marker is reused in the source. Therefore we force numeric markers - self.writeout(unicode(self.currentnote)) + self.writeout(type(u'')(self.currentnote)) self.closetag('a') self.closetag('sup') @@ -1566,12 +1568,11 @@ dl.notes dd:last-of-type { page-break-after: avoid } self.writedata() self.purgedata() - -# ----------------------------------------------------------------------------- -# -# Reading the file -# -# ----------------------------------------------------------------------------- + # ----------------------------------------------------------------------------- + # + # Reading the file + # + # ----------------------------------------------------------------------------- def load(self, odffile): """ Loads a document into the parser and parses it. @@ -1593,7 +1594,7 @@ dl.notes dd:last-of-type { page-break-after: avoid } self._walknode(c) self.endElementNS(node.qname, node.tagName) if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE: - self.characters(unicode(node)) + self.characters(type(u'')(node)) def odf2xhtml(self, odffile): """ Load a file and return the XHTML diff --git a/src/tinycss/tests/__init__.py b/src/tinycss/tests/__init__.py index bbdfb51d47..3cf59a1077 100644 --- a/src/tinycss/tests/__init__.py +++ b/src/tinycss/tests/__init__.py @@ -8,10 +8,6 @@ __copyright__ = '2014, Kovid Goyal ' import unittest -try: - unicode -except NameError: - unicode = str def jsonify(tokens): """Turn tokens into "JSON-compatible" data structures.""" @@ -24,6 +20,7 @@ def jsonify(tokens): else: yield token.type, token.value + class BaseTest(unittest.TestCase): longMessage = True @@ -34,10 +31,8 @@ class BaseTest(unittest.TestCase): """Test not complete error messages but only substrings.""" self.ae(len(errors), len(expected_errors)) for error, expected in zip(errors, expected_errors): - self.assertIn(expected, unicode(error)) + self.assertIn(expected, type(u'')(error)) def jsonify_declarations(self, rule): return [(decl.name, list(jsonify(decl.value))) for decl in rule.declarations] - -