py3: Use unicode_literals and migrate str() in a few more files

This commit is contained in:
Kovid Goyal 2019-05-19 13:20:26 +05:30
parent 7f7c83a709
commit 052cb43ae1
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 29 additions and 30 deletions

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python2
from __future__ import print_function
from __future__ import print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
@ -48,7 +48,7 @@ class Article(object):
print('Failed to process article summary, deleting:')
print(summary.encode('utf-8'))
traceback.print_exc()
summary = u''
summary = ''
self.text_summary = clean_ascii_chars(summary)
self.author = author
self.content = content
@ -83,7 +83,7 @@ class Article(object):
def __repr__(self):
return \
(u'''\
('''\
Title : %s
URL : %s
Author : %s
@ -93,7 +93,7 @@ TOC thumb : %s
Has content : %s
'''%(self.title, self.url, self.author, self.summary[:20]+'...',
self.localtime.strftime('%a, %d %b, %Y %H:%M'), self.toc_thumbnail,
bool(self.content))).encode('utf-8')
bool(self.content)))
def __str__(self):
return repr(self)
@ -208,7 +208,7 @@ class Feed(object):
content = [i.value for i in item.get('content', []) if i.value]
content = [i if isinstance(i, unicode_type) else i.decode('utf-8', 'replace')
for i in content]
content = u'\n'.join(content)
content = '\n'.join(content)
if not content.strip():
content = None
if not link and not content:
@ -286,8 +286,8 @@ class FeedCollection(list):
def __init__(self, feeds):
list.__init__(self, [f for f in feeds if len(f.articles) > 0])
found_articles = set([])
duplicates = set([])
found_articles = set()
duplicates = set()
def in_set(s, a):
for x in s:

View File

@ -1,4 +1,4 @@
from __future__ import with_statement
from __future__ import with_statement, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
@ -54,7 +54,7 @@ class BasicNewsRecipe(Recipe):
#: A couple of lines that describe the content this recipe downloads.
#: This will be used primarily in a GUI that presents a list of recipes.
description = u''
description = ''
#: The author of this recipe
__author__ = __appname__
@ -288,7 +288,7 @@ class BasicNewsRecipe(Recipe):
#: The CSS that is used to style the templates, i.e., the navigation bars and
#: the Tables of Contents. Rather than overriding this variable, you should
#: use `extra_css` in your recipe to customize look and feel.
template_css = u'''
template_css = '''
.article_date {
color: gray; font-family: monospace;
}
@ -446,7 +446,7 @@ class BasicNewsRecipe(Recipe):
so, override in your subclass.
'''
if not self.feeds:
raise NotImplementedError
raise NotImplementedError()
if self.test:
return self.feeds[:self.test[0]]
return self.feeds
@ -462,7 +462,7 @@ class BasicNewsRecipe(Recipe):
return url + '?&pagewanted=print'
'''
raise NotImplementedError
raise NotImplementedError()
@classmethod
def image_url_processor(cls, baseurl, url):
@ -665,7 +665,7 @@ class BasicNewsRecipe(Recipe):
`url_or_raw`: Either a URL or the downloaded index page as a string
'''
if re.match(r'\w+://', url_or_raw):
if re.match((br'\w+://' if isinstance(url_or_raw, bytes) else r'\w+://'), url_or_raw):
# We may be called in a thread (in the skip_ad_pages method), so
# clone the browser to be safe. We cannot use self.cloned_browser
# as it may or may not actually clone the browser, depending on if
@ -698,9 +698,7 @@ class BasicNewsRecipe(Recipe):
if as_tree:
from html5_parser import parse
return parse(_raw)
else:
return BeautifulSoup(_raw)
return parse(_raw, return_root=False)
return BeautifulSoup(_raw)
def extract_readable_article(self, html, url):
'''
@ -725,12 +723,12 @@ class BasicNewsRecipe(Recipe):
root = frag
elif frag.tag == 'body':
root = document_fromstring(
u'<html><head><title>%s</title></head></html>' %
'<html><head><title>%s</title></head></html>' %
extracted_title)
root.append(frag)
else:
root = document_fromstring(
u'<html><head><title>%s</title></head><body/></html>' %
'<html><head><title>%s</title></head><body/></html>' %
extracted_title)
root.xpath('//body')[0].append(frag)
@ -794,7 +792,7 @@ class BasicNewsRecipe(Recipe):
calibre show the user a simple message instead of an error, call
:meth:`abort_recipe_processing`.
'''
raise NotImplementedError
raise NotImplementedError()
def abort_recipe_processing(self, msg):
'''
@ -815,7 +813,7 @@ class BasicNewsRecipe(Recipe):
This method is typically useful for sites that try to make it difficult to
access article content automatically.
'''
raise NotImplementedError
raise NotImplementedError()
def add_toc_thumbnail(self, article, src):
'''
@ -902,9 +900,9 @@ class BasicNewsRecipe(Recipe):
self.css_map = {}
web2disk_cmdline = ['web2disk',
'--timeout', str(self.timeout),
'--max-recursions', str(self.recursions),
'--delay', str(self.delay),
'--timeout', unicode_type(self.timeout),
'--max-recursions', unicode_type(self.recursions),
'--delay', unicode_type(self.delay),
]
if self.verbose:
@ -1068,10 +1066,10 @@ class BasicNewsRecipe(Recipe):
src = force_unicode(src, 'utf-8')
pos = cls.summary_length
fuzz = 50
si = src.find(u';', pos)
si = src.find(';', pos)
if si > 0 and si-pos > fuzz:
si = -1
gi = src.find(u'>', pos)
gi = src.find('>', pos)
if gi > 0 and gi-pos > fuzz:
gi = -1
npos = max(si, gi)
@ -1081,7 +1079,7 @@ class BasicNewsRecipe(Recipe):
if len(ans) < len(src):
from calibre.utils.cleantext import clean_xml_chars
# Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it
ans = clean_xml_chars(ans) + u'\u2026'
ans = clean_xml_chars(ans) + '\u2026'
return ans
def feed2index(self, f, feeds):
@ -1590,7 +1588,7 @@ class BasicNewsRecipe(Recipe):
article.sub_pages = result[1][1:]
self.jobs_done += 1
self.report_progress(float(self.jobs_done)/len(self.jobs),
_(u'Article downloaded: %s')%force_unicode(article.title))
_('Article downloaded: %s')%force_unicode(article.title))
if result[2]:
self.partial_failures.append((request.feed.title, article.title, article.url, result[2]))
@ -1684,7 +1682,7 @@ class BasicNewsRecipe(Recipe):
strings.append(item['alt'])
except KeyError:
pass
ans = u''.join(strings)
ans = ''.join(strings)
if normalize_whitespace:
ans = re.sub(r'\s+', ' ', ans)
return ans

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python2
from __future__ import unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
@ -36,13 +37,13 @@ class Template(object):
if isbytestring(kwargs[key]):
kwargs[key] = kwargs[key].decode('utf-8', 'replace')
if kwargs[key] is None:
kwargs[key] = u''
kwargs[key] = ''
args = list(args)
for i in range(len(args)):
if isbytestring(args[i]):
args[i] = args[i].decode('utf-8', 'replace')
if args[i] is None:
args[i] = u''
args[i] = ''
self._generate(*args, **kwargs)